4646#include "opal/mca/threads/mutex.h"
4747#include "opal/util/bit_ops.h"
4848#include "opal/util/output.h"
49+ #include "opal/util/show_help.h"
4950#include "ompi/mca/topo/topo.h"
5051#include "ompi/mca/topo/base/base.h"
5152#include "ompi/dpm/dpm.h"
@@ -79,6 +80,21 @@ static const ompi_comm_split_type_hw_guided_t ompi_comm_split_type_hw_guided_sup
7980 {.info_value = NULL },
8081};
8182
83+ static const char * ompi_comm_split_type_to_str (int split_type ) {
84+ for (int i = 0 ; NULL != ompi_comm_split_type_hw_guided_support [i ].info_value ; ++ i ) {
85+ if (split_type == ompi_comm_split_type_hw_guided_support [i ].split_type ) {
86+ return ompi_comm_split_type_hw_guided_support [i ].info_value ;
87+ }
88+ }
89+ if (MPI_COMM_TYPE_HW_GUIDED == split_type ) {
90+ return "MPI_COMM_TYPE_HW_GUIDED" ;
91+ }
92+ else if (MPI_COMM_TYPE_HW_UNGUIDED == split_type ) {
93+ return "MPI_COMM_TYPE_HW_UNGUIDED" ;
94+ }
95+ return "Unknown" ;
96+ }
97+
8298/*
8399** sort-function for MPI_Comm_split
84100*/
@@ -793,7 +809,11 @@ static int ompi_comm_split_type_get_part (ompi_group_t *group, const int split_t
793809 * We should not get here as the split type will be changed
794810 * at a higher level.
795811 */
796- opal_output (0 , "Error: in ompi_comm_split_type_get_part() unexpected split_type=%d" , split_type );
812+ opal_show_help ("help-comm.txt" ,
813+ "unexpected-split-type" ,
814+ true,
815+ ompi_comm_split_type_to_str (split_type ),
816+ split_type );
797817 return OMPI_ERR_BAD_PARAM ;
798818 }
799819
@@ -868,9 +888,11 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
868888 ompi_communicator_t * newcomp = MPI_COMM_NULL ;
869889 int my_size , my_rsize = 0 , mode , inter ;
870890 int * lranks = NULL , * rranks = NULL ;
871- int global_split_type , global_orig_split_type , ok , tmp [6 ];
891+ int global_split_type , global_orig_split_type , ok [ 2 ] , tmp [6 ];
872892 int rc ;
873893 int orig_split_type = split_type ;
894+ int flag ;
895+ opal_cstring_t * value = NULL ;
874896
875897 /* silence clang warning. newcomm should never be NULL */
876898 if (OPAL_UNLIKELY (NULL == newcomm )) {
@@ -881,9 +903,6 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
881903
882904 /* Step 0: Convert MPI_COMM_TYPE_HW_GUIDED to the internal type */
883905 if (MPI_COMM_TYPE_HW_GUIDED == split_type ) {
884- int flag ;
885- opal_cstring_t * value = NULL ;
886-
887906 opal_info_get (info , "mpi_hw_resource_type" , & value , & flag );
888907 /* If key is not in the 'info', then return MPI_COMM_NULL.
889908 * This is caught at the MPI interface level, but it doesn't hurt to
@@ -941,26 +960,39 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
941960 global_orig_split_type = tmp [0 ];
942961 global_split_type = tmp [4 ];
943962
944- if (tmp [0 ] != - tmp [1 ] || inter ) {
963+ if (tmp [0 ] != - tmp [1 ] || tmp [ 4 ] != - tmp [ 5 ] || inter ) {
945964 /* at least one rank supplied a different split type check if our split_type is ok */
946- ok = (MPI_UNDEFINED == orig_split_type ) || global_orig_split_type == orig_split_type ;
965+ ok [0 ] = (MPI_UNDEFINED == orig_split_type ) || global_orig_split_type == orig_split_type ;
966+ ok [1 ] = (MPI_UNDEFINED == orig_split_type ) || global_split_type == split_type ;
947967
948- rc = comm -> c_coll -> coll_allreduce (MPI_IN_PLACE , & ok , 1 , MPI_INT , MPI_MIN , comm ,
968+ rc = comm -> c_coll -> coll_allreduce (MPI_IN_PLACE , & ok , 2 , MPI_INT , MPI_MIN , comm ,
949969 comm -> c_coll -> coll_allreduce_module );
950970 if (OPAL_UNLIKELY (OMPI_SUCCESS != rc )) {
951971 return rc ;
952972 }
953973
954974 if (inter ) {
955975 /* need an extra allreduce to ensure that all ranks have the same result */
956- rc = comm -> c_coll -> coll_allreduce (MPI_IN_PLACE , & ok , 1 , MPI_INT , MPI_MIN , comm ,
976+ rc = comm -> c_coll -> coll_allreduce (MPI_IN_PLACE , & ok , 2 , MPI_INT , MPI_MIN , comm ,
957977 comm -> c_coll -> coll_allreduce_module );
958978 if (OPAL_UNLIKELY (OMPI_SUCCESS != rc )) {
959979 return rc ;
960980 }
961981 }
962982
963- if (OPAL_UNLIKELY (!ok )) {
983+ if (OPAL_UNLIKELY (!ok [0 ] || !ok [1 ])) {
984+ if (0 == ompi_comm_rank (comm )) {
985+ opal_info_get (info , "mpi_hw_resource_type" , & value , & flag );
986+ if (!flag ) {
987+ value = NULL ;
988+ }
989+ opal_show_help ("help-comm.txt" ,
990+ "mismatched-split_type-values" ,
991+ true,
992+ ompi_comm_split_type_to_str (orig_split_type ),
993+ orig_split_type ,
994+ NULL == value ? "" : value -> string );
995+ }
964996 return OMPI_ERR_BAD_PARAM ;
965997 }
966998
@@ -978,14 +1010,6 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
9781010 return OMPI_SUCCESS ;
9791011 }
9801012
981- /* MPI_COMM_TYPE_HW_GUIDED: Check if 'value' the same at all ranks */
982- if (tmp [4 ] != - tmp [5 ]) {
983- if (0 == ompi_comm_rank (comm )) {
984- opal_output (0 , "Error: Mismatched info values for MPI_COMM_TYPE_HW_GUIDED" );
985- }
986- return OMPI_ERR_BAD_PARAM ;
987- }
988-
9891013 /* TODO: Make this better...
9901014 *
9911015 * See Example 7.4 in the MPI 4.0 standard for example usage.
0 commit comments