Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit fe3147d

Browse files
authored
Merge pull request #1279 from PDeveze/Patchs-on-mtl-portals4
Patchs on mtl portals4
2 parents b21eea5 + 07fd547 commit fe3147d

File tree

6 files changed

+74
-33
lines changed

6 files changed

+74
-33
lines changed

ompi/mca/mtl/portals4/mtl_portals4.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,11 @@ portals4_init_interface(void)
8686

8787
/* Create send and long message (read) portal table entries */
8888
ret = PtlPTAlloc(ompi_mtl_portals4.ni_h,
89+
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
90+
PTL_PT_FLOWCTRL |
91+
#endif
8992
PTL_PT_ONLY_USE_ONCE |
90-
PTL_PT_ONLY_TRUNCATE |
91-
PTL_PT_FLOWCTRL,
93+
PTL_PT_ONLY_TRUNCATE,
9294
ompi_mtl_portals4.recv_eq_h,
9395
REQ_RECV_TABLE_ID,
9496
&ompi_mtl_portals4.recv_idx);

ompi/mca/mtl/portals4/mtl_portals4.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,23 +42,23 @@ struct mca_mtl_portals4_module_t {
4242
mca_mtl_base_module_t base;
4343

4444
/* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */
45-
int need_init;
45+
int32_t need_init;
4646

4747
/* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */
48-
int use_logical;
48+
int32_t use_logical;
4949
/* Use flow control: 1 (true) : 0 (false) */
50-
int use_flowctl;
50+
int32_t use_flowctl;
5151

5252
/** Eager limit; messages greater than this use a rendezvous protocol */
53-
unsigned long long eager_limit;
53+
uint64_t eager_limit;
5454
/** Size of short message blocks */
55-
unsigned long long recv_short_size;
55+
uint64_t recv_short_size;
5656
/** Number of short message blocks which should be created during startup */
57-
int recv_short_num;
57+
uint32_t recv_short_num;
5858
/** Length of the send event queues */
59-
int send_queue_size;
59+
uint32_t send_queue_size;
6060
/** Length of the receive event queues */
61-
int recv_queue_size;
61+
uint32_t recv_queue_size;
6262
/** Protocol for long message transfer */
6363
enum { eager, rndv } protocol;
6464

ompi/mca/mtl/portals4/mtl_portals4_component.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ ompi_mtl_portals4_component_open(void)
224224
ompi_mtl_portals4.send_eq_h = PTL_INVALID_HANDLE;
225225
ompi_mtl_portals4.recv_eq_h = PTL_INVALID_HANDLE;
226226
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
227-
227+
ompi_mtl_portals4.send_md_h = PTL_INVALID_HANDLE;
228228
ompi_mtl_portals4.long_overflow_me_h = PTL_INVALID_HANDLE;
229229
ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL;
230230
ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL;
@@ -277,6 +277,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
277277
{
278278
int ret;
279279
ptl_process_t id;
280+
ptl_ni_limits_t actual_limits;
280281

281282
if (enable_mpi_threads && ompi_mpi_thread_multiple) {
282283
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
@@ -298,13 +299,13 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
298299
PTL_NI_LOGICAL | PTL_NI_MATCHING,
299300
PTL_PID_ANY,
300301
NULL,
301-
NULL,
302+
&actual_limits,
302303
&ompi_mtl_portals4.ni_h);
303304
else ret = PtlNIInit(PTL_IFACE_DEFAULT,
304305
PTL_NI_PHYSICAL | PTL_NI_MATCHING,
305306
PTL_PID_ANY,
306307
NULL,
307-
NULL,
308+
&actual_limits,
308309
&ompi_mtl_portals4.ni_h);
309310
if (PTL_OK != ret) {
310311
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
@@ -313,6 +314,25 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
313314
goto error;
314315
}
315316

317+
if (ompi_comm_rank(MPI_COMM_WORLD) == 0) {
318+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_entries=%d", actual_limits.max_entries);
319+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_unexpected_headers=%d", actual_limits.max_unexpected_headers);
320+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_mds=%d", actual_limits.max_mds);
321+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_eqs=%d", actual_limits.max_eqs);
322+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_cts=%d", actual_limits.max_cts);
323+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_pt_index=%d", actual_limits.max_pt_index);
324+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_iovecs=%d", actual_limits.max_iovecs);
325+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_list_size=%d", actual_limits.max_list_size);
326+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_triggered_ops=%d", actual_limits.max_triggered_ops);
327+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_msg_size=%ld", actual_limits.max_msg_size);
328+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_atomic_size=%ld", actual_limits.max_atomic_size);
329+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_fetch_atomic_size=%ld", actual_limits.max_fetch_atomic_size);
330+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_waw_ordered_size=%ld", actual_limits.max_waw_ordered_size);
331+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_war_ordered_size=%ld", actual_limits.max_war_ordered_size);
332+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_volatile_size=%ld", actual_limits.max_volatile_size);
333+
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "features=%u", actual_limits.features);
334+
}
335+
316336
ret = PtlGetUid(ompi_mtl_portals4.ni_h, &ompi_mtl_portals4.uid);
317337
if (PTL_OK != ret) {
318338
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
@@ -344,6 +364,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
344364
"My nid,pid = %x,%x",
345365
id.phys.nid, id.phys.pid));
346366

367+
ompi_mtl_portals4.base.mtl_max_tag = MTL_PORTALS4_MAX_TAG;
347368
return &ompi_mtl_portals4.base;
348369

349370
error:

ompi/mca/mtl/portals4/mtl_portals4_flowctl.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,16 @@ ompi_mtl_portals4_flowctl_init(void)
219219
int
220220
ompi_mtl_portals4_flowctl_fini(void)
221221
{
222-
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx);
223-
PtlCTFree(ompi_mtl_portals4.flowctl.trigger_ct_h);
224222
PtlMEUnlink(ompi_mtl_portals4.flowctl.trigger_me_h);
225-
PtlCTFree(ompi_mtl_portals4.flowctl.alert_ct_h);
223+
PtlCTFree(ompi_mtl_portals4.flowctl.trigger_ct_h);
226224
PtlMEUnlink(ompi_mtl_portals4.flowctl.alert_me_h);
227-
PtlCTFree(ompi_mtl_portals4.flowctl.fanin_ct_h);
225+
PtlCTFree(ompi_mtl_portals4.flowctl.alert_ct_h);
228226
PtlMEUnlink(ompi_mtl_portals4.flowctl.fanin_me_h);
229-
PtlCTFree(ompi_mtl_portals4.flowctl.fanout_ct_h);
227+
PtlCTFree(ompi_mtl_portals4.flowctl.fanin_ct_h);
230228
PtlMEUnlink(ompi_mtl_portals4.flowctl.fanout_me_h);
229+
PtlCTFree(ompi_mtl_portals4.flowctl.fanout_ct_h);
230+
231+
PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.flowctl_idx);
231232

232233
return OMPI_SUCCESS;
233234
}

ompi/mca/mtl/portals4/mtl_portals4_recv.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
9191
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
9292
"%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
9393
__FILE__, __LINE__, ev->ni_fail_type);
94+
ret = PTL_FAIL;
9495
goto callback_error;
9596
}
9697

@@ -118,7 +119,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
118119
to pull the second part of the message. */
119120
ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit,
120121
((msg_length > ptl_request->delivery_len) ?
121-
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
122+
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
122123
ev->initiator,
123124
ev->hdr_data,
124125
ompi_mtl_portals4.eager_limit,
@@ -159,6 +160,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
159160
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
160161
"%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
161162
__FILE__, __LINE__, ev->ni_fail_type);
163+
ret = PTL_FAIL;
162164
goto callback_error;
163165
}
164166

@@ -204,6 +206,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
204206
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
205207
"%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
206208
__FILE__, __LINE__, ev->ni_fail_type);
209+
ret = PTL_FAIL;
207210
goto callback_error;
208211
}
209212

@@ -285,7 +288,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
285288

286289
ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength,
287290
((msg_length > ptl_request->delivery_len) ?
288-
ptl_request->delivery_len : msg_length) - ev->mlength,
291+
ptl_request->delivery_len : msg_length) - ev->mlength,
289292
ev->initiator,
290293
ev->hdr_data,
291294
ev->mlength,

ompi/mca/mtl/portals4/mtl_portals4_recv_short.c

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static int
3737
ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
3838
ompi_mtl_portals4_base_request_t* ptl_base_request)
3939
{
40+
int ret = OMPI_SUCCESS;
4041
ompi_mtl_portals4_recv_short_request_t *ptl_request =
4142
(ompi_mtl_portals4_recv_short_request_t*) ptl_base_request;
4243
ompi_mtl_portals4_recv_short_block_t *block = ptl_request->block;
@@ -59,10 +60,10 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
5960
opal_list_remove_item(&ompi_mtl_portals4.recv_short_blocks,
6061
&block->base);
6162
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
62-
ompi_mtl_portals4_recv_short_block_free(block);
63+
ret = ompi_mtl_portals4_recv_short_block_free(block);
6364
} else {
6465
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
65-
ompi_mtl_portals4_activate_block(block);
66+
ret = ompi_mtl_portals4_activate_block(block);
6667
}
6768
break;
6869

@@ -85,6 +86,7 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
8586
break;
8687

8788
case PTL_EVENT_AUTO_UNLINK:
89+
block->me_h = PTL_INVALID_HANDLE;
8890
#if OMPI_ENABLE_THREAD_MULTIPLE
8991
OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex);
9092
switch (block->status) {
@@ -99,12 +101,12 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
99101
opal_list_remove_item(&ompi_mtl_portals4.recv_short_blocks,
100102
&block->base);
101103
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
102-
ompi_mtl_portals4_recv_short_block_free(block);
104+
ret = ompi_mtl_portals4_recv_short_block_free(block);
103105
} else {
104106
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
105107
OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output,
106108
"mtl:portals4 PTL_EVENT_AUTO_UNLINK received after PTL_EVENT_AUTO_FREE"));
107-
ompi_mtl_portals4_activate_block(block);
109+
ret = ompi_mtl_portals4_activate_block(block);
108110
}
109111
break;
110112

@@ -150,7 +152,7 @@ ompi_mtl_portals4_recv_block_progress(ptl_event_t *ev,
150152
break;
151153
}
152154

153-
return OMPI_SUCCESS;
155+
return ret;
154156
}
155157

156158

@@ -210,6 +212,7 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
210212
me.uid = ompi_mtl_portals4.uid;
211213
me.options =
212214
PTL_ME_OP_PUT |
215+
PTL_ME_EVENT_COMM_DISABLE |
213216
PTL_ME_MANAGE_LOCAL |
214217
PTL_ME_MAY_ALIGN;
215218
if (ompi_mtl_portals4.use_logical) {
@@ -244,7 +247,8 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
244247
int
245248
ompi_mtl_portals4_recv_short_init(void)
246249
{
247-
int i;
250+
int ret = OMPI_SUCCESS;
251+
uint32_t i;
248252

249253
OBJ_CONSTRUCT(&ompi_mtl_portals4.short_block_mutex, opal_mutex_t);
250254
OBJ_CONSTRUCT(&(ompi_mtl_portals4.recv_short_blocks), opal_list_t);
@@ -258,46 +262,56 @@ ompi_mtl_portals4_recv_short_init(void)
258262
}
259263
opal_list_append(&ompi_mtl_portals4.recv_short_blocks,
260264
&block->base);
261-
ompi_mtl_portals4_activate_block(block);
265+
ret = ompi_mtl_portals4_activate_block(block);
262266
}
263267

264-
return OMPI_SUCCESS;
268+
return ret;
265269
}
266270

267271

268272
int
269273
ompi_mtl_portals4_recv_short_fini(void)
270274
{
271275
opal_list_item_t *item;
276+
int ret = OMPI_SUCCESS;
272277

273278
OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex);
274279
while (NULL != (item = opal_list_remove_first(&ompi_mtl_portals4.recv_short_blocks))) {
275280
ompi_mtl_portals4_recv_short_block_t *block =
276281
(ompi_mtl_portals4_recv_short_block_t*) item;
277-
ompi_mtl_portals4_recv_short_block_free(block);
282+
ret = ompi_mtl_portals4_recv_short_block_free(block);
283+
ompi_mtl_portals4.active_recv_short_blocks--;
278284
}
279285
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
280286

281-
return OMPI_SUCCESS;
287+
return ret;
282288
}
283289

284290

285291
int
286292
ompi_mtl_portals4_recv_short_link(int count)
287293
{
294+
int ret = OMPI_SUCCESS;
288295
int active = ompi_mtl_portals4.active_recv_short_blocks;
289296
int i;
290297

291298
if (active < count) {
292299
for (i = 0 ; i < (count - active) ; ++i) {
293300
ompi_mtl_portals4_recv_short_block_t *block =
294-
ompi_mtl_portals4_recv_short_block_alloc(false);
301+
ompi_mtl_portals4_recv_short_block_alloc(true);
295302
if (NULL == block) {
296303
return OMPI_ERR_OUT_OF_RESOURCE;
297304
}
298-
ompi_mtl_portals4_activate_block(block);
305+
OPAL_THREAD_LOCK(&ompi_mtl_portals4.short_block_mutex);
306+
opal_list_append(&ompi_mtl_portals4.recv_short_blocks,
307+
&block->base);
308+
OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output,
309+
"recv_short_link: total=%d active=%d",
310+
(int) opal_list_get_size(&ompi_mtl_portals4.recv_short_blocks), ompi_mtl_portals4.active_recv_short_blocks));
311+
OPAL_THREAD_UNLOCK(&ompi_mtl_portals4.short_block_mutex);
312+
ret = ompi_mtl_portals4_activate_block(block);
299313
}
300314
}
301315

302-
return OMPI_SUCCESS;
316+
return ret;
303317
}

0 commit comments

Comments
 (0)