Skip to content

Commit c5cf343

Browse files
committed
OSC rdma win allocate: synchronize error codes across shared memory group
Signed-off-by: Joseph Schuchart <[email protected]> (cherry picked from commit 8f27cc2)
1 parent 73c4aac commit c5cf343

File tree

1 file changed

+52
-39
lines changed

1 file changed

+52
-39
lines changed

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 52 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,19 @@ struct _local_data {
524524
size_t size;
525525
};
526526

527+
static int synchronize_errorcode(int errorcode, ompi_communicator_t *comm)
528+
{
529+
int ret;
530+
int err = errorcode;
531+
/* This assumes that error codes are negative integers */
532+
ret = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &err, 1, MPI_INT, MPI_MIN,
533+
comm, comm->c_coll->coll_allreduce_module);
534+
if (OPAL_UNLIKELY (OMPI_SUCCESS != ret)) {
535+
err = ret;
536+
}
537+
return err;
538+
}
539+
527540
static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, size_t size)
528541
{
529542
ompi_communicator_t *shared_comm;
@@ -595,28 +608,35 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
595608
OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm));
596609
if (0 > ret) {
597610
ret = OMPI_ERR_OUT_OF_RESOURCE;
598-
break;
611+
} else {
612+
/* allocate enough space for the state + data for all local ranks */
613+
ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size);
614+
free (data_file);
615+
if (OPAL_SUCCESS != ret) {
616+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment");
617+
}
599618
}
619+
}
600620

601-
/* allocate enough space for the state + data for all local ranks */
602-
ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size);
603-
free (data_file);
604-
if (OPAL_SUCCESS != ret) {
605-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment");
606-
break;
607-
}
621+
ret = synchronize_errorcode(ret, shared_comm);
622+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
623+
break;
608624
}
609625

610-
ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
626+
ret = shared_comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
611627
shared_comm, shared_comm->c_coll->coll_bcast_module);
612-
if (OMPI_SUCCESS != ret) {
628+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
613629
break;
614630
}
615631

616632
module->segment_base = opal_shmem_segment_attach (&module->seg_ds);
617633
if (NULL == module->segment_base) {
618634
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to attach to the shared memory segment");
619635
ret = OPAL_ERROR;
636+
}
637+
638+
ret = synchronize_errorcode(ret, shared_comm);
639+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
620640
break;
621641
}
622642

@@ -636,35 +656,28 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
636656
memset (module->state, 0, module->state_size);
637657

638658
if (0 == local_rank) {
659+
/* unlink the shared memory backing file */
660+
opal_shmem_unlink (&module->seg_ds);
639661
/* just go ahead and register the whole segment */
640662
ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, module->segment_base, total_size, MCA_BTL_REG_FLAG_ACCESS_ANY,
641663
&module->state_handle);
642-
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
643-
break;
664+
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
665+
state_region->base = (intptr_t) module->segment_base;
666+
if (module->state_handle) {
667+
memcpy (state_region->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size);
668+
}
644669
}
670+
}
645671

646-
state_region->base = (intptr_t) module->segment_base;
647-
if (module->state_handle) {
648-
memcpy (state_region->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size);
649-
}
672+
/* synchronization to make sure memory is registered */
673+
ret = synchronize_errorcode(ret, shared_comm);
674+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
675+
break;
650676
}
651677

652678
if (MPI_WIN_FLAVOR_CREATE == module->flavor) {
653679
ret = ompi_osc_rdma_initialize_region (module, base, size);
654-
if (OMPI_SUCCESS != ret) {
655-
break;
656-
}
657-
}
658-
659-
/* barrier to make sure all ranks have attached */
660-
shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module);
661-
662-
/* unlink the shared memory backing file */
663-
if (0 == local_rank) {
664-
opal_shmem_unlink (&module->seg_ds);
665-
}
666-
667-
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
680+
} else if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
668681
ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *) module->state->regions;
669682
module->state->disp_unit = module->disp_unit;
670683
module->state->region_count = 1;
@@ -675,8 +688,11 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
675688
}
676689
}
677690

678-
/* barrier to make sure all ranks have set up their region data */
679-
shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module);
691+
/* synchronization to make sure all ranks have set up their region data */
692+
ret = synchronize_errorcode(ret, shared_comm);
693+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
694+
break;
695+
}
680696

681697
offset = data_base;
682698
for (int i = 0 ; i < local_size ; ++i) {
@@ -995,13 +1011,7 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module)
9951011
free (temp);
9961012
} while (0);
9971013

998-
999-
ret = module->comm->c_coll->coll_allreduce (&ret, &global_result, 1, MPI_INT, MPI_MIN, module->comm,
1000-
module->comm->c_coll->coll_allreduce_module);
1001-
1002-
if (OMPI_SUCCESS != ret) {
1003-
global_result = ret;
1004-
}
1014+
global_result = synchronize_errorcode(ret, module->comm);
10051015

10061016
/* none of these communicators are needed anymore so free them now*/
10071017
if (MPI_COMM_NULL != module->local_leaders) {
@@ -1236,6 +1246,9 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
12361246

12371247
/* fill in our part */
12381248
ret = allocate_state_shared (module, base, size);
1249+
1250+
/* notify all others if something went wrong */
1251+
ret = synchronize_errorcode(ret, module->comm);
12391252
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
12401253
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to allocate internal state");
12411254
ompi_osc_rdma_free (win);

0 commit comments

Comments
 (0)