Skip to content

Commit d6cb658

Browse files
bosilcaMinh Quan Ho
authored andcommitted
Allow UCC to be used with sessions
And other instances where OMPI CIDs are not global. In this case, OMPI maintains a translation table for each communicator, but this tabe is not exposed to other software layers (such as UCC). As a result UCC must be coerced to create a unique ID for the team by itself. Signed-off-by: George Bosilca <[email protected]>
1 parent 5e445de commit d6cb658

File tree

2 files changed

+26
-15
lines changed

2 files changed

+26
-15
lines changed

ompi/communicator/comm.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2528,7 +2528,7 @@ int ompi_comm_determine_first_auto ( ompi_communicator_t* intercomm )
25282528
/********************************************************************************/
25292529
int ompi_comm_dump ( ompi_communicator_t *comm )
25302530
{
2531-
opal_output(0, "Dumping information for comm_cid %s\n", ompi_comm_print_cid (comm));
2531+
opal_output(0, "Dumping information for comm_cid %s : %d\n", ompi_comm_print_cid (comm), ompi_comm_get_local_cid(comm));
25322532
opal_output(0," f2c index:%d cube_dim: %d\n", comm->c_f_to_c_index,
25332533
comm->c_cube_dim);
25342534
opal_output(0," Local group: size = %d my_rank = %d\n",
@@ -2539,13 +2539,17 @@ int ompi_comm_dump ( ompi_communicator_t *comm )
25392539
/* Display flags */
25402540
if ( OMPI_COMM_IS_INTER(comm) )
25412541
opal_output(0," inter-comm,");
2542+
else
2543+
opal_output(0," intra-comm,");
25422544
if ( OMPI_COMM_IS_CART(comm))
25432545
opal_output(0," topo-cart");
25442546
else if ( OMPI_COMM_IS_GRAPH(comm))
25452547
opal_output(0," topo-graph");
25462548
else if ( OMPI_COMM_IS_DIST_GRAPH(comm))
25472549
opal_output(0," topo-dist-graph");
2548-
opal_output(0,"\n");
2550+
else
2551+
opal_output(0, " no topo");
2552+
opal_output(0,"\n");
25492553

25502554
if (OMPI_COMM_IS_INTER(comm)) {
25512555
opal_output(0," Remote group size:%d\n", comm->c_remote_group->grp_proc_count);

ompi/mca/coll/ucc/coll_ucc_module.c

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "ompi_config.h"
1515
#include "coll_ucc.h"
16+
#include "coll_ucc_common.h"
1617
#include "coll_ucc_dtypes.h"
1718
#include "ompi/mca/coll/base/coll_tags.h"
1819
#include "ompi/mca/pml/pml.h"
@@ -219,7 +220,8 @@ static ucc_status_t oob_allgather(void *sbuf, void *rbuf, size_t msglen,
219220
}
220221

221222

222-
static int mca_coll_ucc_init_ctx() {
223+
static int mca_coll_ucc_init_ctx(ompi_communicator_t* comm)
224+
{
223225
mca_coll_ucc_component_t *cm = &mca_coll_ucc_component;
224226
char str_buf[256];
225227
ompi_attribute_fn_ptr_union_t del_fn;
@@ -270,9 +272,9 @@ static int mca_coll_ucc_init_ctx() {
270272
ctx_params.oob.allgather = oob_allgather;
271273
ctx_params.oob.req_test = oob_allgather_test;
272274
ctx_params.oob.req_free = oob_allgather_free;
273-
ctx_params.oob.coll_info = (void*)MPI_COMM_WORLD;
274-
ctx_params.oob.n_oob_eps = ompi_comm_size(&ompi_mpi_comm_world.comm);
275-
ctx_params.oob.oob_ep = ompi_comm_rank(&ompi_mpi_comm_world.comm);
275+
ctx_params.oob.coll_info = (void*)comm;
276+
ctx_params.oob.n_oob_eps = ompi_comm_size(comm);
277+
ctx_params.oob.oob_ep = ompi_comm_rank(comm);
276278
if (UCC_OK != ucc_context_config_read(cm->ucc_lib, NULL, &ctx_config)) {
277279
UCC_ERROR("UCC context config read failed");
278280
goto cleanup_lib;
@@ -329,7 +331,7 @@ static int mca_coll_ucc_init_ctx() {
329331
return OMPI_ERROR;
330332
}
331333

332-
uint64_t rank_map_cb(uint64_t ep, void *cb_ctx)
334+
static uint64_t rank_map_cb(uint64_t ep, void *cb_ctx)
333335
{
334336
struct ompi_communicator_t *comm = cb_ctx;
335337

@@ -433,8 +435,7 @@ static int mca_coll_ucc_module_enable(mca_coll_base_module_t *module,
433435
ucc_team_params_t team_params = {
434436
.mask = UCC_TEAM_PARAM_FIELD_EP_MAP |
435437
UCC_TEAM_PARAM_FIELD_EP |
436-
UCC_TEAM_PARAM_FIELD_EP_RANGE |
437-
UCC_TEAM_PARAM_FIELD_ID,
438+
UCC_TEAM_PARAM_FIELD_EP_RANGE,
438439
.ep_map = {
439440
.type = (comm == &ompi_mpi_comm_world.comm) ?
440441
UCC_EP_MAP_FULL : UCC_EP_MAP_CB,
@@ -443,12 +444,18 @@ static int mca_coll_ucc_module_enable(mca_coll_base_module_t *module,
443444
.cb.cb_ctx = (void*)comm
444445
},
445446
.ep = ompi_comm_rank(comm),
446-
.ep_range = UCC_COLLECTIVE_EP_RANGE_CONTIG,
447-
.id = ompi_comm_get_local_cid(comm)
447+
.ep_range = UCC_COLLECTIVE_EP_RANGE_CONTIG
448448
};
449-
UCC_VERBOSE(2, "creating ucc_team for comm %p, comm_id %llu, comm_size %d",
450-
(void*)comm, (long long unsigned)team_params.id,
451-
ompi_comm_size(comm));
449+
if (OMPI_COMM_IS_GLOBAL_INDEX(comm)) {
450+
team_params.mask |= UCC_TEAM_PARAM_FIELD_ID;
451+
team_params.id = ompi_comm_get_local_cid(comm);
452+
UCC_VERBOSE(2, "creating ucc_team for comm %p, comm_id %llu, comm_size %d",
453+
(void*)comm, (long long unsigned)team_params.id,
454+
ompi_comm_size(comm));
455+
} else {
456+
UCC_VERBOSE(2, "creating ucc_team for comm %p, comm_id not provided, comm_size %d",
457+
(void*)comm, ompi_comm_size(comm));
458+
}
452459

453460
if (UCC_OK != ucc_team_create_post(&cm->ucc_context, 1,
454461
&team_params, &ucc_module->ucc_team)) {
@@ -555,7 +562,7 @@ mca_coll_ucc_comm_query(struct ompi_communicator_t *comm, int *priority)
555562
}
556563

557564
if (!cm->libucc_initialized) {
558-
if (OMPI_SUCCESS != mca_coll_ucc_init_ctx()) {
565+
if (OMPI_SUCCESS != mca_coll_ucc_init_ctx(comm)) {
559566
cm->ucc_enable = 0;
560567
return NULL;
561568
}

0 commit comments

Comments
 (0)