Skip to content

Commit fc55b19

Browse files
committed
MTL PSM2: add a thread lock while peeking and completing the psm2
requests. Reviewed-by: Gopalakrishnan, Aravind <[email protected]> Signed-off-by: Matias Cabral <[email protected]> (cherry picked from commit b81bcd4) Signed-off-by: Matias Cabral <[email protected]>
1 parent 313f61d commit fc55b19

File tree

3 files changed

+46
-38
lines changed

3 files changed

+46
-38
lines changed

ompi/mca/mtl/psm2/mtl_psm2.c

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -406,58 +406,62 @@ int ompi_mtl_psm2_progress( void ) {
406406
int completed = 1;
407407

408408
do {
409+
OPAL_THREAD_LOCK(&mtl_psm2_mq_mutex);
409410
err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL);
410-
if (err == PSM2_MQ_INCOMPLETE) {
411-
return completed;
412-
} else if (err != PSM2_OK) {
413-
goto error;
414-
}
411+
if (err == PSM2_MQ_INCOMPLETE) {
412+
OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
413+
return completed;
414+
} else if (OPAL_UNLIKELY(err != PSM2_OK)) {
415+
OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
416+
goto error;
417+
}
415418

416-
completed++;
419+
err = psm2_mq_test2(&req, &psm2_status);
420+
OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
417421

418-
err = psm2_mq_test2(&req, &psm2_status);
419-
if (err != PSM2_OK) {
420-
goto error;
421-
}
422+
if (OPAL_UNLIKELY (err != PSM2_OK)) {
423+
goto error;
424+
}
425+
426+
completed++;
422427

423428
mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context;
424429

425-
if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) {
430+
if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) {
426431

427-
mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE =
428-
psm2_status.msg_tag.tag1;
429-
mtl_psm2_request->super.ompi_req->req_status.MPI_TAG =
430-
psm2_status.msg_tag.tag0;
432+
mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE =
433+
psm2_status.msg_tag.tag1;
434+
mtl_psm2_request->super.ompi_req->req_status.MPI_TAG =
435+
psm2_status.msg_tag.tag0;
431436
mtl_psm2_request->super.ompi_req->req_status._ucount =
432437
psm2_status.nbytes;
433438

434439
ompi_mtl_datatype_unpack(mtl_psm2_request->convertor,
435-
mtl_psm2_request->buf,
436-
psm2_status.msg_length);
437-
}
438-
439-
if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) {
440-
if (mtl_psm2_request->free_after) {
441-
free(mtl_psm2_request->buf);
442-
}
443-
}
440+
mtl_psm2_request->buf,
441+
psm2_status.msg_length);
442+
}
444443

445-
switch (psm2_status.error_code) {
446-
case PSM2_OK:
447-
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
448-
OMPI_SUCCESS;
449-
break;
450-
case PSM2_MQ_TRUNCATION:
451-
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
452-
MPI_ERR_TRUNCATE;
453-
break;
454-
default:
455-
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
456-
MPI_ERR_INTERN;
457-
}
444+
if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) {
445+
if (mtl_psm2_request->free_after) {
446+
free(mtl_psm2_request->buf);
447+
}
448+
}
458449

459-
mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);
450+
switch (psm2_status.error_code) {
451+
case PSM2_OK:
452+
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
453+
OMPI_SUCCESS;
454+
break;
455+
case PSM2_MQ_TRUNCATION:
456+
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
457+
MPI_ERR_TRUNCATE;
458+
break;
459+
default:
460+
mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
461+
MPI_ERR_INTERN;
462+
}
460463

464+
mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);
461465
}
462466
while (1);
463467

ompi/mca/mtl/psm2/mtl_psm2.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
BEGIN_C_DECLS
3636

37+
/* MPI_THREAD_MULTIPLE_SUPPORT */
38+
extern opal_mutex_t mtl_psm2_mq_mutex;
3739

3840
/* MTL interface functions */
3941
extern int ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t* mtl,

ompi/mca/mtl/psm2/mtl_psm2_component.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
#include <glob.h>
4343

4444
static int param_priority;
45+
/* MPI_THREAD_MULTIPLE_SUPPORT */
46+
opal_mutex_t mtl_psm2_mq_mutex = OPAL_MUTEX_STATIC_INIT;
4547

4648
static int ompi_mtl_psm2_component_open(void);
4749
static int ompi_mtl_psm2_component_close(void);

0 commit comments

Comments
 (0)