Skip to content

Commit b6b16a6

Browse files
committed
osc/rdma: Clean up initialization debugging
Clean up language around btl selection phases to match rest of the code (accelerated/alternate). Also switch component initialization code to use opal_output_verbose rather than OSC_RDMA_VERBOSE, so that the debugging prints can be enabled on a non-debug build. Signed-off-by: Brian Barrett <[email protected]>
1 parent 8fedcc0 commit b6b16a6

File tree

1 file changed

+46
-26
lines changed

1 file changed

+46
-26
lines changed

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,8 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
442442
int ret, my_rank;
443443
size_t memory_alignment = module->memory_alignment;
444444

445-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating private internal state");
445+
opal_output_verbose(MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output,
446+
"allocating private internal state");
446447

447448
my_rank = ompi_comm_rank (module->comm);
448449

@@ -598,7 +599,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
598599
return allocate_state_single (module, base, size);
599600
}
600601

601-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocating shared internal state");
602+
opal_output_verbose(MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output,
603+
"allocating shared internal state");
602604

603605
local_rank_array_size = sizeof (ompi_osc_rdma_rank_data_t) * RANK_ARRAY_COUNT (module);
604606
leader_peer_data_size = module->region_size * module->node_count;
@@ -654,7 +656,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
654656
ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size);
655657
free (data_file);
656658
if (OPAL_SUCCESS != ret) {
657-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment");
659+
opal_output_verbose(MCA_BASE_VERBOSE_ERROR, ompi_osc_base_framework.framework_output,
660+
"failed to create shared memory segment");
658661
}
659662
}
660663
}
@@ -672,7 +675,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
672675

673676
module->segment_base = opal_shmem_segment_attach (&module->seg_ds);
674677
if (NULL == module->segment_base) {
675-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to attach to the shared memory segment");
678+
opal_output_verbose(MCA_BASE_VERBOSE_ERROR, ompi_osc_base_framework.framework_output,
679+
"failed to attach to the shared memory segment");
676680
ret = OPAL_ERROR;
677681
}
678682

@@ -898,7 +902,8 @@ static int ompi_osc_rdma_query_alternate_btls (ompi_communicator_t *comm, ompi_o
898902

899903
btls_to_use = opal_argv_split (ompi_osc_rdma_btl_alternate_names, ',');
900904
if (NULL == btls_to_use) {
901-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no alternate BTLs requested: %s", ompi_osc_rdma_btl_alternate_names);
905+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
906+
"no alternate BTLs requested: %s", ompi_osc_rdma_btl_alternate_names);
902907
return OMPI_ERR_UNREACH;
903908
}
904909

@@ -908,20 +913,23 @@ static int ompi_osc_rdma_query_alternate_btls (ompi_communicator_t *comm, ompi_o
908913

909914
/* rdma and atomics are only supported with BTLs at the moment */
910915
for (int i = 0 ; btls_to_use[i] ; ++i) {
911-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "checking for btl %s", btls_to_use[i]);
916+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, "checking for btl %s", btls_to_use[i]);
912917
OPAL_LIST_FOREACH(item, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) {
913918
if (NULL != item->btl_module->btl_register_mem) {
914-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "skipping RDMA btl when searching for alternate BTL");
919+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
920+
"skipping RDMA btl when searching for alternate BTL");
915921
continue;
916922
}
917923

918924
if (0 != strcmp (btls_to_use[i], item->btl_module->btl_component->btl_version.mca_component_name)) {
919-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "skipping btl %s",
920-
item->btl_module->btl_component->btl_version.mca_component_name);
925+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
926+
"skipping btl %s",
927+
item->btl_module->btl_component->btl_version.mca_component_name);
921928
continue;
922929
}
923930

924-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "found alternate btl %s", btls_to_use[i]);
931+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
932+
"found alternate btl %s", btls_to_use[i]);
925933

926934
++btls_found;
927935
if (module) {
@@ -1089,7 +1097,8 @@ static int ompi_osc_rdma_query_accelerated_btls (ompi_communicator_t *comm, ompi
10891097
}
10901098

10911099
if (NULL == selected_btl) {
1092-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no suitable btls found");
1100+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
1101+
"accelerated_query: no suitable btls found");
10931102
return OMPI_ERR_NOT_AVAILABLE;
10941103
}
10951104

@@ -1100,8 +1109,9 @@ static int ompi_osc_rdma_query_accelerated_btls (ompi_communicator_t *comm, ompi
11001109
module->use_memory_registration = selected_btl->btl_register_mem != NULL;
11011110
}
11021111

1103-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "selected btl: %s",
1104-
selected_btl->btl_component->btl_version.mca_component_name);
1112+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
1113+
"accelerated_query: selected btl: %s",
1114+
selected_btl->btl_component->btl_version.mca_component_name);
11051115

11061116
return OMPI_SUCCESS;
11071117
}
@@ -1150,7 +1160,8 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module)
11501160
module->region_size, MPI_BYTE, module->local_leaders,
11511161
module->local_leaders->c_coll->coll_allgather_module);
11521162
if (OMPI_SUCCESS != ret) {
1153-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "leader allgather failed with ompi error code %d", ret);
1163+
opal_output_verbose(MCA_BASE_VERBOSE_ERROR, ompi_osc_base_framework.framework_output,
1164+
"leader allgather failed with ompi error code %d", ret);
11541165
break;
11551166
}
11561167
}
@@ -1193,7 +1204,8 @@ static int ompi_osc_rdma_create_groups (ompi_osc_rdma_module_t *module)
11931204
/* create a shared communicator to handle communication about the local segment */
11941205
ret = ompi_comm_split_type (module->comm, MPI_COMM_TYPE_SHARED, 0, NULL, &module->shared_comm);
11951206
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1196-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create a shared memory communicator. error code %d", ret);
1207+
opal_output_verbose(MCA_BASE_VERBOSE_ERROR, ompi_osc_base_framework.framework_output,
1208+
"failed to create a shared memory communicator. error code %d", ret);
11971209
return ret;
11981210
}
11991211

@@ -1204,7 +1216,8 @@ static int ompi_osc_rdma_create_groups (ompi_osc_rdma_module_t *module)
12041216
ret = ompi_comm_split (module->comm, (0 == local_rank) ? 0 : MPI_UNDEFINED, comm_rank, &module->local_leaders,
12051217
false);
12061218
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1207-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create local leaders communicator. error code %d", ret);
1219+
opal_output_verbose(MCA_BASE_VERBOSE_ERROR, ompi_osc_base_framework.framework_output,
1220+
"failed to create local leaders communicator. error code %d", ret);
12081221
return ret;
12091222
}
12101223

@@ -1217,7 +1230,8 @@ static int ompi_osc_rdma_create_groups (ompi_osc_rdma_module_t *module)
12171230
ret = module->shared_comm->c_coll->coll_bcast (values, 2, MPI_INT, 0, module->shared_comm,
12181231
module->shared_comm->c_coll->coll_bcast_module);
12191232
if (OMPI_SUCCESS != ret) {
1220-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to broadcast local data. error code %d", ret);
1233+
opal_output_verbose(MCA_BASE_VERBOSE_ERROR, ompi_osc_base_framework.framework_output,
1234+
"failed to broadcast local data. error code %d", ret);
12211235
return ret;
12221236
}
12231237
}
@@ -1350,8 +1364,9 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
13501364
return ret;
13511365
}
13521366

1353-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "creating osc/rdma window of flavor %d with id %s",
1354-
flavor, ompi_comm_print_cid (module->comm));
1367+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
1368+
"creating osc/rdma window of flavor %d with id %s",
1369+
flavor, ompi_comm_print_cid (module->comm));
13551370

13561371
/* peer data */
13571372
if (world_size > init_limit) {
@@ -1372,11 +1387,13 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
13721387
/* find rdma capable endpoints */
13731388
ret = ompi_osc_rdma_query_accelerated_btls (module->comm, module);
13741389
if (OMPI_SUCCESS != ret) {
1375-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_WARN, "could not find a suitable btl. falling back on "
1376-
"active-message BTLs");
1390+
opal_output_verbose(MCA_BASE_VERBOSE_WARN, ompi_osc_base_framework.framework_output,
1391+
"could not find an accelerated btl. falling back on "
1392+
"active-message BTLs");
13771393
ret = ompi_osc_rdma_query_alternate_btls (module->comm, module);
13781394
if (OMPI_SUCCESS != ret) {
1379-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_WARN, "no BTL available for RMA window");
1395+
opal_output_verbose(MCA_BASE_VERBOSE_WARN, ompi_osc_base_framework.framework_output,
1396+
"no BTL available for RMA window");
13801397
ompi_osc_rdma_free (win);
13811398
return ret;
13821399
}
@@ -1428,7 +1445,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
14281445
/* notify all others if something went wrong */
14291446
ret = synchronize_errorcode(ret, module->comm);
14301447
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1431-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to allocate internal state");
1448+
opal_output_verbose(MCA_BASE_VERBOSE_ERROR, ompi_osc_base_framework.framework_output,
1449+
"failed to allocate internal state");
14321450
ompi_osc_rdma_free (win);
14331451
return ret;
14341452
}
@@ -1479,14 +1497,16 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
14791497

14801498
ret = ompi_osc_rdma_share_data (module);
14811499
if (OMPI_SUCCESS != ret) {
1482-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to share window data with peers");
1500+
opal_output_verbose(MCA_BASE_VERBOSE_ERROR, ompi_osc_base_framework.framework_output,
1501+
"failed to share window data with peers");
14831502
ompi_osc_rdma_free (win);
14841503
} else {
14851504
/* for now the leader is always rank 0 in the communicator */
14861505
module->leader = ompi_osc_rdma_module_peer (module, 0);
14871506

1488-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %s",
1489-
ompi_comm_print_cid(module->comm));
1507+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
1508+
"finished creating osc/rdma window with id %s",
1509+
ompi_comm_print_cid(module->comm));
14901510
}
14911511

14921512
return ret;

0 commit comments

Comments
 (0)