Skip to content

Commit 349cb75

Browse files
committed
comm_split_type HW_GUIDED fix MPI_UNDEFINED handling
Signed-off-by: Joshua Hursey <[email protected]>
1 parent 2306f09 commit 349cb75

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

ompi/communicator/comm.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,7 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
868868
ompi_communicator_t *newcomp = MPI_COMM_NULL;
869869
int my_size, my_rsize = 0, mode, inter;
870870
int *lranks = NULL, *rranks = NULL;
871-
int global_split_type, global_orig_split_type, ok, tmp[6];
871+
int global_split_type, global_orig_split_type, ok[2], tmp[6];
872872
int rc;
873873
int orig_split_type = split_type;
874874

@@ -941,26 +941,34 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
941941
global_orig_split_type = tmp[0];
942942
global_split_type = tmp[4];
943943

944-
if (tmp[0] != -tmp[1] || inter) {
944+
if (tmp[0] != -tmp[1] || tmp[4] != -tmp[5] || inter) {
945945
/* at least one rank supplied a different split type check if our split_type is ok */
946-
ok = (MPI_UNDEFINED == orig_split_type) || global_orig_split_type == orig_split_type;
946+
ok[0] = (MPI_UNDEFINED == orig_split_type) || global_orig_split_type == orig_split_type;
947+
ok[1] = (MPI_UNDEFINED == orig_split_type) || global_split_type == split_type;
947948

948-
rc = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &ok, 1, MPI_INT, MPI_MIN, comm,
949+
rc = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &ok, 2, MPI_INT, MPI_MIN, comm,
949950
comm->c_coll->coll_allreduce_module);
950951
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
951952
return rc;
952953
}
953954

954955
if (inter) {
955956
/* need an extra allreduce to ensure that all ranks have the same result */
956-
rc = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &ok, 1, MPI_INT, MPI_MIN, comm,
957+
rc = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &ok, 2, MPI_INT, MPI_MIN, comm,
957958
comm->c_coll->coll_allreduce_module);
958959
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
959960
return rc;
960961
}
961962
}
962963

963-
if (OPAL_UNLIKELY(!ok)) {
964+
if (OPAL_UNLIKELY(!ok[0] || !ok[1])) {
965+
if (0 == ompi_comm_rank(comm)) {
966+
if (!ok[1]) {
967+
opal_output(0, "Error: Mismatched info values for MPI_COMM_TYPE_HW_GUIDED");
968+
} else {
969+
opal_output(0, "Error: Mismatched info values for split_type");
970+
}
971+
}
964972
return OMPI_ERR_BAD_PARAM;
965973
}
966974

@@ -978,14 +986,6 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
978986
return OMPI_SUCCESS;
979987
}
980988

981-
/* MPI_COMM_TYPE_HW_GUIDED: Check if 'value' the same at all ranks */
982-
if (tmp[4] != -tmp[5]) {
983-
if (0 == ompi_comm_rank(comm)) {
984-
opal_output(0, "Error: Mismatched info values for MPI_COMM_TYPE_HW_GUIDED");
985-
}
986-
return OMPI_ERR_BAD_PARAM;
987-
}
988-
989989
/* TODO: Make this better...
990990
*
991991
* See Example 7.4 in the MPI 4.0 standard for example usage.

0 commit comments

Comments
 (0)