Skip to content

Commit 010f176

Browse files
committed
PR feedback
Signed-off-by: Howard Pritchard <[email protected]>
1 parent 0ce9cae commit 010f176

File tree

7 files changed

+59
-195
lines changed

7 files changed

+59
-195
lines changed

ompi/communicator/comm_cid.c

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,7 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c
952952
ompi_comm_cid_context_t *context;
953953
ompi_comm_request_t *request;
954954
ompi_request_t *subreq;
955+
uint32_t comm_size;
955956
int ret = 0;
956957

957958
/* the caller should not pass NULL for comm (it may be the same as *newcomm) */
@@ -973,6 +974,25 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c
973974

974975
request->context = &context->super;
975976

977+
/* Prep communicator for handling remote cids if needed */
978+
979+
if (!OMPI_COMM_IS_GLOBAL_INDEX(*newcomm)) {
980+
if (OMPI_COMM_IS_INTER(*newcomm)) {
981+
comm_size = ompi_comm_remote_size(*newcomm);
982+
} else {
983+
comm_size = ompi_comm_size(*newcomm);
984+
}
985+
986+
(*newcomm)->c_index_vec = (uint32_t *)calloc(comm_size, sizeof(uint32_t));
987+
if (NULL == (*newcomm)->c_index_vec) {
988+
return OMPI_ERR_OUT_OF_RESOURCE;
989+
}
990+
991+
if (OMPI_COMM_IS_INTRA(*newcomm)) {
992+
(*newcomm)->c_index_vec[(*newcomm)->c_my_rank] = (*newcomm)->c_index;
993+
}
994+
}
995+
976996
if (MPI_UNDEFINED != (*newcomm)->c_local_group->grp_my_rank) {
977997
/* Initialize the PML stuff in the newcomm */
978998
if ( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_comm(*newcomm))) ) {
@@ -1041,9 +1061,16 @@ int ompi_comm_get_remote_cid (ompi_communicator_t *comm, int dest, uint32_t *rem
10411061

10421062
assert(NULL != remote_cid);
10431063

1044-
if (OMPI_COMM_IS_GLOBAL_INDEX(comm)) {
1064+
if (OPAL_LIKELY(OMPI_COMM_IS_GLOBAL_INDEX(comm))) {
1065+
10451066
*remote_cid = comm->c_index;
1067+
1068+
} else if (0 != comm->c_index_vec[dest]) {
1069+
1070+
*remote_cid = comm->c_index_vec[dest];
1071+
10461072
} else {
1073+
10471074
ompi_proc = ompi_comm_peer_lookup(comm, dest);
10481075
OPAL_PMIX_CONVERT_NAME(&pmix_proc, &ompi_proc->super.proc_name);
10491076

@@ -1075,8 +1102,10 @@ int ompi_comm_get_remote_cid (ompi_communicator_t *comm, int dest, uint32_t *rem
10751102
PMIX_VALUE_GET_NUMBER(rc, val, remote_cid64, size_t);
10761103
rc = OMPI_SUCCESS;
10771104
*remote_cid = (uint32_t)remote_cid64;
1105+
comm->c_index_vec[dest] = (uint32_t)remote_cid64;
10781106
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get PMIX_GROUP_LOCAL_CID %d for cid_base %ld", *remote_cid, excid.cid_base));
10791107
}
1108+
10801109
}
10811110

10821111
done:

ompi/communicator/comm_init.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,7 @@ static void ompi_comm_construct(ompi_communicator_t* comm)
432432
comm->c_coll = NULL;
433433
comm->c_nbc_tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE;
434434
comm->instance = NULL;
435+
comm->c_index_vec = NULL;
435436

436437
/*
437438
* magic numerology - see TOPDIR/ompi/include/mpif-values.pl
@@ -533,6 +534,11 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
533534
comm->c_name = NULL;
534535
}
535536

537+
if (NULL != comm->c_index_vec) {
538+
free (comm->c_index_vec);
539+
comm->c_index_vec = NULL;
540+
}
541+
536542
#if OPAL_ENABLE_FT_MPI
537543
if( NULL != comm->agreement_specific ) {
538544
OBJ_RELEASE( comm->agreement_specific );

ompi/communicator/communicator.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,10 @@ struct ompi_communicator_t {
286286
uint32_t c_epoch; /* Identifier used to differentiate between two communicators
287287
using the same c_contextid (not at the same time, obviously) */
288288
#endif
289+
/* vector used to store remote cid values for communicators not using
290+
* a global cid, i.e. when OMPI_COMM_IS_GLOBAL_INDEX(comm) returns 0.
291+
*/
292+
uint32_t *c_index_vec;
289293
/* Non-blocking collective tag. These tags might be shared between
290294
* all non-blocking collective modules (to avoid message collision
291295
* between them in the case where multiple outstanding non-blocking

ompi/mca/mtl/ofi/mtl_ofi.c

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
3-
* Copyright (c) 2021-2022 Triad National Security, LLC. All rights
3+
* Copyright (c) 2021-2024 Triad National Security, LLC. All rights
44
* reserved.
55
*
66
* $COPYRIGHT$
@@ -14,8 +14,6 @@
1414

1515
OMPI_DECLSPEC extern mca_mtl_ofi_component_t mca_mtl_ofi_component;
1616

17-
OBJ_CLASS_INSTANCE(mca_mtl_comm_t, opal_object_t, NULL, NULL);
18-
1917
mca_mtl_ofi_module_t ompi_mtl_ofi = {
2018
{
2119
(int)((1ULL << MTL_OFI_CID_BIT_COUNT_1) - 1), /* max cid */
@@ -346,38 +344,10 @@ int ompi_mtl_ofi_add_comm(struct mca_mtl_base_module_t *mtl,
346344
struct ompi_communicator_t *comm)
347345
{
348346
int ret = OMPI_SUCCESS;
349-
uint32_t comm_size;
350-
mca_mtl_comm_t* mtl_comm;
351347

352348
mca_mtl_ofi_ep_type ep_type = (0 == ompi_mtl_ofi.enable_sep) ?
353349
OFI_REGULAR_EP : OFI_SCALABLE_EP;
354350

355-
if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) {
356-
mtl_comm = OBJ_NEW(mca_mtl_comm_t);
357-
358-
if (OMPI_COMM_IS_INTER(comm)) {
359-
comm_size = ompi_comm_remote_size(comm);
360-
} else {
361-
comm_size = ompi_comm_size(comm);
362-
}
363-
mtl_comm->c_index_vec = (c_index_vec_t *)calloc(comm_size, sizeof(c_index_vec_t));
364-
if (NULL == mtl_comm->c_index_vec) {
365-
ret = OMPI_ERR_OUT_OF_RESOURCE;
366-
OBJ_RELEASE(mtl_comm);
367-
goto error;
368-
}
369-
if (OMPI_COMM_IS_INTRA(comm)) {
370-
mtl_comm->c_index_vec[comm->c_my_rank].c_index = comm->c_index;
371-
}
372-
373-
comm->c_mtl_comm = mtl_comm;
374-
375-
} else {
376-
377-
comm->c_mtl_comm = NULL;
378-
379-
}
380-
381351
/*
382352
* If thread grouping enabled, add new OFI context for each communicator
383353
* other than MPI_COMM_SELF.
@@ -407,12 +377,6 @@ int ompi_mtl_ofi_del_comm(struct mca_mtl_base_module_t *mtl,
407377
mca_mtl_ofi_ep_type ep_type = (0 == ompi_mtl_ofi.enable_sep) ?
408378
OFI_REGULAR_EP : OFI_SCALABLE_EP;
409379

410-
if(NULL != comm->c_mtl_comm) {
411-
free(comm->c_mtl_comm->c_index_vec);
412-
OBJ_RELEASE(comm->c_mtl_comm);
413-
comm->c_mtl_comm = NULL;
414-
}
415-
416380
/*
417381
* Clean up OFI contexts information.
418382
*/

ompi/mca/mtl/ofi/mtl_ofi.h

Lines changed: 8 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -76,31 +76,6 @@ int ompi_mtl_ofi_progress_no_inline(void);
7676
extern opal_thread_local int ompi_mtl_ofi_per_thread_ctx;
7777
#endif
7878

79-
#define MCA_MTL_OFI_CID_NOT_EXCHANGED 2
80-
#define MCA_MTL_OFI_CID_EXCHANGING 1
81-
#define MCA_MTL_OFI_CID_EXCHANGED 0
82-
83-
typedef struct {
84-
uint32_t c_index;
85-
} c_index_vec_t;
86-
87-
typedef struct mca_mtl_comm_t {
88-
opal_object_t super;
89-
c_index_vec_t *c_index_vec;
90-
} mca_mtl_comm_t;
91-
92-
OBJ_CLASS_DECLARATION(mca_mtl_comm_t);
93-
94-
struct mca_mtl_ofi_cid_hdr_t {
95-
ompi_comm_extended_cid_t hdr_cid;
96-
int16_t hdr_src_c_index;
97-
int32_t hdr_src;
98-
bool need_response;
99-
bool ofi_cq_data;
100-
};
101-
102-
typedef struct mca_mtl_ofi_cid_hdr_t mca_mtl_ofi_cid_hdr_t;
103-
10479
/* Set OFI context for operations which generate completion events */
10580
__opal_attribute_always_inline__ static inline void
10681
set_thread_context(int ctxt)
@@ -572,7 +547,8 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl,
572547
{
573548
ssize_t ret = OMPI_SUCCESS;
574549
ompi_mtl_ofi_request_t ofi_req;
575-
int ompi_ret, ctxt_id = 0, c_index_for_tag;
550+
int ompi_ret, ctxt_id = 0;
551+
uint32_t c_index_for_tag;
576552
void *start;
577553
bool free_after;
578554
size_t length;
@@ -582,25 +558,10 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl,
582558
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
583559
fi_addr_t src_addr = 0;
584560
fi_addr_t sep_peer_fiaddr = 0;
585-
mca_mtl_comm_t *mtl_comm;
586-
uint32_t remote_cid;
587561

588-
if (OPAL_LIKELY(OMPI_COMM_IS_GLOBAL_INDEX(comm))) {
589-
c_index_for_tag = comm->c_index;
590-
} else {
591-
mtl_comm = comm->c_mtl_comm;
592-
/*
593-
* if we do not have the local cid for the target receiver,
594-
* retrieve via the PMIx modex
595-
*/
596-
if (mtl_comm->c_index_vec[dest].c_index == 0) {
597-
ompi_ret = ompi_comm_get_remote_cid(comm, dest, &remote_cid);
598-
if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) {
599-
return ompi_ret;
600-
}
601-
mtl_comm->c_index_vec[dest].c_index = remote_cid;
602-
}
603-
c_index_for_tag = mtl_comm->c_index_vec[dest].c_index;
562+
ompi_ret = ompi_comm_get_remote_cid(comm, dest, &c_index_for_tag);
563+
if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) {
564+
return ompi_ret;
604565
}
605566

606567
ompi_mtl_ofi_set_mr_null(&ofi_req);
@@ -828,7 +789,8 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl,
828789
{
829790
ssize_t ret = OMPI_SUCCESS;
830791
ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t *) mtl_request;
831-
int ompi_ret, ctxt_id = 0, c_index_for_tag;
792+
int ompi_ret, ctxt_id = 0;
793+
uint32_t c_index_for_tag;
832794
void *start;
833795
size_t length;
834796
bool free_after;
@@ -837,28 +799,10 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl,
837799
mca_mtl_ofi_endpoint_t *endpoint = NULL;
838800
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
839801
fi_addr_t sep_peer_fiaddr = 0;
840-
mca_mtl_comm_t *mtl_comm;
841-
uint32_t remote_cid;
842802

843803
ompi_mtl_ofi_set_mr_null(ofi_req);
844804

845-
if (OMPI_COMM_IS_GLOBAL_INDEX(comm)) {
846-
c_index_for_tag = comm->c_index;
847-
} else {
848-
mtl_comm = comm->c_mtl_comm;
849-
/*
850-
* if we do not have the local cid for the target receiver,
851-
* retrive via the PMIx modex
852-
*/
853-
if (mtl_comm->c_index_vec[dest].c_index == 0) {
854-
ompi_ret = ompi_comm_get_remote_cid(comm, dest, &remote_cid);
855-
if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) {
856-
return ompi_ret;
857-
}
858-
mtl_comm->c_index_vec[dest].c_index = remote_cid;
859-
}
860-
c_index_for_tag = mtl_comm->c_index_vec[dest].c_index;
861-
}
805+
ompi_ret = ompi_comm_get_remote_cid(comm, dest, &c_index_for_tag);
862806

863807
if (ompi_mtl_ofi.total_ctxts_used > 0) {
864808
ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used;

0 commit comments

Comments
 (0)