Skip to content

Commit d0ca5d0

Browse files
committed
Add support for detecting when dynamic add_procs is not possible
This commit adds support to the pml, mtl, and btl frameworks for components to indicate at runtime that they do not support the new dynamic add_procs behavior. At the high end the lack of dynamic add_procs support is signalled by the pml using the new pml_flags member to the pml module structure. If the MCA_PML_BASE_FLAG_REQUIRE_WORLD flag is set MPI_Init will generate the ompi_proc_t array passed to add_proc from ompi_proc_world () instead of ompi_proc_get_allocated (). Both cm and ob1 have been updated to detect if the underlying mtl and btl components support dynamic add_procs. Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from open-mpi/ompi@54a4061)
1 parent f3d4bc1 commit d0ca5d0

File tree

7 files changed

+65
-11
lines changed

7 files changed

+65
-11
lines changed

ompi/mca/mtl/mtl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ struct mca_mtl_request_t {
5656
};
5757
typedef struct mca_mtl_request_t mca_mtl_request_t;
5858

59+
60+
/**
61+
* MTL module flags
62+
*/
63+
#define MCA_MTL_BASE_FLAG_REQUIRE_WORLD 0x00000001
64+
5965
/**
6066
* Initialization routine for MTL component
6167
*

ompi/mca/mtl/portals4/mtl_portals4_component.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ ompi_mtl_portals4_component_open(void)
232232
return OMPI_SUCCESS;
233233
}
234234

235+
#define NEED_ALL_PROCS (ompi_mtl_portals4.use_logical || ompi_mtl_portals4.use_flowctl)
236+
235237
static int
236238
ompi_mtl_portals4_component_query(mca_base_module_t **module, int *priority)
237239
{
@@ -241,6 +243,13 @@ ompi_mtl_portals4_component_query(mca_base_module_t **module, int *priority)
241243

242244
*priority = param_priority;
243245
*module = (mca_base_module_t *)&ompi_mtl_portals4.base;
246+
247+
if (NEED_ALL_PROCS) {
248+
/* let the pml know we need add_procs to be calls with all the
249+
* procs in the job */
250+
ompi_mtl_portals4.base.mtl_flags |= MCA_MTL_BASE_FLAG_REQUIRE_WORLD;
251+
}
252+
244253
return OMPI_SUCCESS;
245254
}
246255

ompi/mca/pml/cm/pml_cm_component.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ mca_pml_cm_component_init(int* priority,
169169
}
170170

171171

172+
if (ompi_mtl->mtl_flags & MCA_MTL_BASE_FLAG_REQUIRE_WORLD) {
173+
ompi_pml_cm.super.pml_flags |= MCA_PML_BASE_FLAG_REQUIRE_WORLD;
174+
}
175+
172176
/* update our tag / context id max values based on MTL
173177
information */
174178
ompi_pml_cm.super.pml_max_contextid = ompi_mtl->mtl_max_contextid;

ompi/mca/pml/ob1/pml_ob1_component.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "opal/mca/allocator/base/base.h"
3939
#include "opal/mca/base/mca_base_pvar.h"
4040
#include "opal/runtime/opal_params.h"
41+
#include "opal/mca/btl/base/base.h"
4142

4243
OBJ_CLASS_INSTANCE( mca_pml_ob1_pckt_pending_t,
4344
opal_free_list_item_t,
@@ -278,6 +279,17 @@ mca_pml_ob1_component_init( int* priority,
278279
return NULL;
279280
}
280281

282+
/* check if any btls do not support dynamic add_procs */
283+
mca_btl_base_selected_module_t* selected_btl;
284+
OPAL_LIST_FOREACH(selected_btl, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) {
285+
mca_btl_base_module_t *btl = selected_btl->btl_module;
286+
287+
if (btl->btl_flags & MCA_BTL_FLAGS_SINGLE_ADD_PROCS) {
288+
mca_pml_ob1.super.pml_flags |= MCA_PML_BASE_FLAG_REQUIRE_WORLD;
289+
break;
290+
}
291+
}
292+
281293
/* Set this here (vs in component_open()) because
282294
opal_leave_pinned* may have been set after MCA params were
283295
read (e.g., by the openib btl) */

ompi/mca/pml/pml.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ struct ompi_proc_t;
9393
* indicates whether multiple threads may invoke this component
9494
* simultaneously or not.
9595
*/
96-
typedef struct mca_pml_base_module_1_0_0_t * (*mca_pml_base_component_init_fn_t)(
96+
typedef struct mca_pml_base_module_1_0_1_t * (*mca_pml_base_component_init_fn_t)(
9797
int *priority,
9898
bool enable_progress_threads,
9999
bool enable_mpi_threads);
@@ -479,13 +479,18 @@ typedef int (*mca_pml_base_module_dump_fn_t)(
479479
*/
480480
typedef int (*mca_pml_base_module_ft_event_fn_t) (int status);
481481

482-
482+
/**
483+
* pml module flags
484+
*/
485+
/** PML requires requires all procs in the job on the first call to
486+
* add_procs */
487+
#define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001
483488

484489
/**
485490
* PML instance.
486491
*/
487492

488-
struct mca_pml_base_module_1_0_0_t {
493+
struct mca_pml_base_module_1_0_1_t {
489494

490495
/* downcalls from MCA to PML */
491496
mca_pml_base_module_add_procs_fn_t pml_add_procs;
@@ -519,9 +524,10 @@ struct mca_pml_base_module_1_0_0_t {
519524
/* maximum constant sizes */
520525
uint32_t pml_max_contextid;
521526
int pml_max_tag;
527+
int pml_flags;
522528
};
523-
typedef struct mca_pml_base_module_1_0_0_t mca_pml_base_module_1_0_0_t;
524-
typedef mca_pml_base_module_1_0_0_t mca_pml_base_module_t;
529+
typedef struct mca_pml_base_module_1_0_1_t mca_pml_base_module_1_0_1_t;
530+
typedef mca_pml_base_module_1_0_1_t mca_pml_base_module_t;
525531

526532
/*
527533
* Macro for use in components that are of type pml
@@ -546,6 +552,10 @@ typedef mca_pml_base_module_1_0_0_t mca_pml_base_module_t;
546552

547553
OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml;
548554

555+
static inline bool mca_pml_base_requires_world (void)
556+
{
557+
return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_REQUIRE_WORLD);
558+
}
549559

550560
END_C_DECLS
551561
#endif /* MCA_PML_H */

ompi/runtime/ompi_mpi_init.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -742,11 +742,21 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
742742
goto error;
743743
}
744744

745-
/* add all allocated ompi_proc_t's to PML (below the add_procs limit this
746-
* behaves identically to ompi_proc_world ()) */
747-
if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) {
748-
error = "ompi_proc_get_allocated () failed";
749-
goto error;
745+
/* some btls/mtls require we call add_procs with all procs in the job.
746+
* since the btls/mtls have no visibility here it is up to the pml to
747+
* convey this requirement */
748+
if (mca_pml_base_requires_world ()) {
749+
if (NULL == (procs = ompi_proc_world (&nprocs))) {
750+
error = "ompi_proc_get_allocated () failed";
751+
goto error;
752+
}
753+
} else {
754+
/* add all allocated ompi_proc_t's to PML (below the add_procs limit this
755+
* behaves identically to ompi_proc_world ()) */
756+
if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) {
757+
error = "ompi_proc_get_allocated () failed";
758+
goto error;
759+
}
750760
}
751761
ret = MCA_PML_CALL(add_procs(procs, nprocs));
752762
free(procs);

opal/mca/btl/btl.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,12 +231,15 @@ typedef uint8_t mca_btl_base_tag_t;
231231
*/
232232
#define MCA_BTL_FLAGS_SIGNALED 0x4000
233233

234-
235234
/** The BTL supports network atomic operations */
236235
#define MCA_BTL_FLAGS_ATOMIC_OPS 0x08000
237236
/** The BTL supports fetching network atomic operations */
238237
#define MCA_BTL_FLAGS_ATOMIC_FOPS 0x10000
239238

239+
/** The BTL requires add_procs to be with all procs including non-local. Shared-memory
240+
* BTLs should not set this flag. */
241+
#define MCA_BTL_FLAGS_SINGLE_ADD_PROCS 0x20000
242+
240243
/* Default exclusivity levels */
241244
#define MCA_BTL_EXCLUSIVITY_HIGH (64*1024) /* internal loopback */
242245
#define MCA_BTL_EXCLUSIVITY_DEFAULT 1024 /* GM/IB/etc. */

0 commit comments

Comments
 (0)