Skip to content

Commit d6a1df7

Browse files
committed
ch4/ofi: delay setting MPIDI_OFI_global.num_nics
MPIDI_OFI_global.num_nics affects runtime paths such as ofi progress and large message striping. Only set it in MPIDI_OFI_init_vcis so we won't have complications when multi-nics is not ready.
1 parent 076c509 commit d6a1df7

File tree

3 files changed

+8
-6
lines changed

3 files changed

+8
-6
lines changed

src/mpid/ch4/netmod/ofi/ofi_nic.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ int MPIDI_OFI_init_multi_nic(struct fi_info *prov)
195195
mpi_errno = setup_single_nic();
196196
MPIR_ERR_CHECK(mpi_errno);
197197
}
198-
MPIR_Assert(MPIDI_OFI_global.num_nics > 0);
198+
MPIR_Assert(MPIDI_OFI_global.num_nics_available > 0);
199199

200200
fn_exit:
201201
return mpi_errno;
@@ -205,7 +205,7 @@ int MPIDI_OFI_init_multi_nic(struct fi_info *prov)
205205

206206
static int setup_single_nic(void)
207207
{
208-
MPIDI_OFI_global.num_nics = 1;
208+
MPIDI_OFI_global.num_nics_available = 1;
209209
MPIDI_OFI_global.nic_info[0].nic = MPIDI_OFI_global.prov_use[0];
210210
MPIDI_OFI_global.nic_info[0].id = 0;
211211
MPIDI_OFI_global.nic_info[0].close = 1;
@@ -217,7 +217,7 @@ static int setup_single_nic(void)
217217
MPIR_Info *info_ptr = NULL;
218218
MPIR_Info_get_ptr(MPI_INFO_ENV, info_ptr);
219219
snprintf(nics_str, 32, "%d", 1);
220-
MPIR_Info_set_impl(info_ptr, "num_nics", nics_str);
220+
MPIR_Info_set_impl(info_ptr, "num_nics_available", nics_str);
221221
snprintf(nics_str, 32, "%d", 1);
222222
MPIR_Info_set_impl(info_ptr, "num_close_nics", nics_str);
223223

@@ -405,14 +405,14 @@ static int setup_multi_nic(int nic_count)
405405
for (int i = num_nics; i < nic_count; i++) {
406406
fi_freeinfo(MPIDI_OFI_global.prov_use[i]);
407407
}
408-
MPIDI_OFI_global.num_nics = MPL_MIN(nic_count, num_nics);
408+
MPIDI_OFI_global.num_nics_available = MPL_MIN(nic_count, num_nics);
409409

410410
/* Set some info keys on MPI_INFO_ENV to reflect the number of available (close) NICs */
411411
char nics_str[32];
412412
MPIR_Info *info_ptr = NULL;
413413
MPIR_Info_get_ptr(MPI_INFO_ENV, info_ptr);
414-
snprintf(nics_str, 32, "%d", MPIDI_OFI_global.num_nics);
415-
MPIR_Info_set_impl(info_ptr, "num_nics", nics_str);
414+
snprintf(nics_str, 32, "%d", MPIDI_OFI_global.num_nics_available);
415+
MPIR_Info_set_impl(info_ptr, "num_nics_available", nics_str);
416416
snprintf(nics_str, 32, "%d", num_close_nics);
417417
MPIR_Info_set_impl(info_ptr, "num_close_nics", nics_str);
418418

src/mpid/ch4/netmod/ofi/ofi_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@ typedef struct MPIDI_GPU_RDMA_queue_t {
460460
typedef struct {
461461
/* OFI objects */
462462
int avtid;
463+
int num_nics_available;
463464
struct fi_info *prov_use[MPIDI_OFI_MAX_NICS];
464465
MPIDI_OFI_nic_info_t nic_info[MPIDI_OFI_MAX_NICS];
465466
struct fid_fabric *fabric;

src/mpid/ch4/netmod/ofi/ofi_vci.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ int MPIDI_OFI_init_vcis(int num_vcis, int *num_vcis_actual)
6767
MPIR_Assert(num_vcis == 1 || MPIDI_OFI_ENABLE_SCALABLE_ENDPOINTS);
6868
#endif
6969

70+
MPIDI_OFI_global.num_nics = MPIDI_OFI_global.num_nics_available;
7071
MPIDI_OFI_global.num_vcis = num_vcis;
7172

7273
/* All processes must have the same number of NICs */

0 commit comments

Comments
 (0)