Skip to content

Commit bc634db

Browse files
committed
Make sure the gather is called in all cases, and not
simply based on some local state. This is the second part of the patch proposed for #1183. Signed-off-by: George Bosilca <[email protected]>
1 parent 2216b80 commit bc634db

File tree

3 files changed

+67
-84
lines changed

3 files changed

+67
-84
lines changed

ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c

Lines changed: 64 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
171171
OPAL_MODEX_RECV_VALUE(err, OPAL_PMIX_NODEID, &(proc->super.proc_name), &pval, OPAL_UINT32);
172172
if( OPAL_SUCCESS != err ) {
173173
opal_output(0, "Unable to extract peer %s nodeid from the modex.\n",
174-
OMPI_NAME_PRINT(&(proc->super.proc_name)));
175-
vpids[i] = colors[i] = -1;
174+
OMPI_NAME_PRINT(&(proc->super)));
175+
colors[i] = -1;
176176
continue;
177177
}
178178
vpids[i] = colors[i] = (int)val;
@@ -396,38 +396,37 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
396396
* If weights have been provided take them in account. Otherwise rely
397397
* solely on HWLOC information.
398398
*/
399-
if(0 == rank) {
399+
if( 0 == rank ) {
400400

401+
#ifdef __DEBUG__
401402
fprintf(stderr,"========== Centralized Reordering ========= \n");
402-
403+
#endif
403404
local_pattern = (double *)calloc(size*size,sizeof(double));
404-
if( true == topo->weighted ) {
405-
for(i = 0; i < topo->indegree ; i++)
406-
local_pattern[topo->in[i]] += topo->inw[i];
407-
for(i = 0; i < topo->outdegree ; i++)
408-
local_pattern[topo->out[i]] += topo->outw[i];
409-
if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_gather(MPI_IN_PLACE, size, MPI_DOUBLE,
410-
local_pattern, size, MPI_DOUBLE,
411-
0, comm_old,
412-
comm_old->c_coll->coll_gather_module)))
413-
return err;
414-
}
415405
} else {
416406
local_pattern = (double *)calloc(size,sizeof(double));
417-
if( true == topo->weighted ) {
418-
for(i = 0; i < topo->indegree ; i++)
419-
local_pattern[topo->in[i]] += topo->inw[i];
420-
for(i = 0; i < topo->outdegree ; i++)
421-
local_pattern[topo->out[i]] += topo->outw[i];
422-
if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_gather(local_pattern, size, MPI_DOUBLE,
423-
NULL,0,0,
424-
0, comm_old,
425-
comm_old->c_coll->coll_gather_module)))
426-
return err;
427-
}
407+
}
408+
if( true == topo->weighted ) {
409+
for(i = 0; i < topo->indegree ; i++)
410+
local_pattern[topo->in[i]] += topo->inw[i];
411+
for(i = 0; i < topo->outdegree ; i++)
412+
local_pattern[topo->out[i]] += topo->outw[i];
413+
}
414+
if(0 == rank) {
415+
err = comm_old->c_coll->coll_gather(MPI_IN_PLACE, size, MPI_DOUBLE,
416+
local_pattern, size, MPI_DOUBLE,
417+
0, comm_old,
418+
comm_old->c_coll->coll_gather_module);
419+
} else {
420+
err = comm_old->c_coll->coll_gather(local_pattern, size, MPI_DOUBLE,
421+
NULL,0,0,
422+
0, comm_old,
423+
comm_old->c_coll->coll_gather_module);
424+
}
425+
if (OMPI_SUCCESS != err) {
426+
return err;
428427
}
429428

430-
if( rank == local_procs[0]) {
429+
if( rank == local_procs[0] ) {
431430
tm_topology_t *tm_topology = NULL;
432431
tm_topology_t *tm_opt_topology = NULL;
433432
int *obj_to_rank_in_comm = NULL;
@@ -708,7 +707,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
708707
char set_as_string[64];
709708
opal_value_t kv;
710709

711-
if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old,colors[rank],ompi_process_info.my_local_rank,&localcomm, false)))
710+
if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, colors[rank],
711+
ompi_process_info.my_local_rank,
712+
&localcomm, false)))
712713
return err;
713714

714715
for(i = 0 ; i < num_procs_in_node ; i++)
@@ -718,64 +719,64 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
718719
for(i = 0 ; i < size ; i++)
719720
grank_to_lrank[i] = -1;
720721

721-
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_allgather(&rank,1,MPI_INT,
722-
lrank_to_grank,1,MPI_INT,
723-
localcomm,
724-
localcomm->c_coll->coll_allgather_module)))
722+
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_allgather(&rank, 1, MPI_INT,
723+
lrank_to_grank, 1, MPI_INT,
724+
localcomm,
725+
localcomm->c_coll->coll_allgather_module)))
725726
return err;
726727

727728
for(i = 0 ; i < num_procs_in_node ; i++)
728729
grank_to_lrank[lrank_to_grank[i]] = i;
729730

730-
if (rank == local_procs[0]){
731-
tm_topology_t *tm_topology = NULL;
732-
tm_topology_t *tm_opt_topology = NULL;
733-
tree_t *comm_tree = NULL;
734-
double **comm_pattern = NULL;
735-
731+
/* Discover the local patterns */
732+
if (rank == local_procs[0]) {
736733
#ifdef __DEBUG__
737734
fprintf(stderr,"========== Partially Distributed Reordering ========= \n");
738735
#endif
736+
local_pattern = (double *)calloc(num_procs_in_node * num_procs_in_node,sizeof(double));
737+
} else {
738+
local_pattern = (double *)calloc(num_procs_in_node,sizeof(double));
739+
}
740+
for(i = 0; i < topo->indegree ; i++)
741+
if (grank_to_lrank[topo->in[i]] != -1)
742+
local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
743+
for(i = 0; i < topo->outdegree ; i++)
744+
if (grank_to_lrank[topo->out[i]] != -1)
745+
local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
746+
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_gather((rank == local_procs[0] ? MPI_IN_PLACE : local_pattern),
747+
num_procs_in_node, MPI_DOUBLE,
748+
local_pattern, num_procs_in_node, MPI_DOUBLE,
749+
0, localcomm,
750+
localcomm->c_coll->coll_gather_module)))
751+
ERR_EXIT(err);
739752

740-
local_pattern = (double *)calloc(num_procs_in_node*num_procs_in_node,sizeof(double));
741-
for(i = 0 ; i < num_procs_in_node*num_procs_in_node ; i++)
742-
local_pattern[i] = 0.0;
743-
744-
if( true == topo->weighted ) {
745-
for(i = 0; i < topo->indegree ; i++)
746-
if (grank_to_lrank[topo->in[i]] != -1)
747-
local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
748-
for(i = 0; i < topo->outdegree ; i++)
749-
if (grank_to_lrank[topo->out[i]] != -1)
750-
local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
751-
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_gather(MPI_IN_PLACE, num_procs_in_node, MPI_DOUBLE,
752-
local_pattern, num_procs_in_node, MPI_DOUBLE,
753-
0,localcomm,
754-
localcomm->c_coll->coll_gather_module)))
755-
ERR_EXIT(err);
756-
}
753+
/* The root has now the entire information, so let's crunch it */
754+
if (rank == local_procs[0]) {
755+
tm_topology_t *tm_topology = NULL;
756+
tm_topology_t *tm_opt_topology = NULL;
757+
tree_t *comm_tree = NULL;
758+
double **comm_pattern = NULL;
757759

758760
comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
759761
for(i = 0 ; i < num_procs_in_node ; i++){
760-
comm_pattern[i] = (double *)calloc(num_procs_in_node,sizeof(double));
761-
memcpy((void *)comm_pattern[i],(void *)(local_pattern + i*num_procs_in_node),num_procs_in_node*sizeof(double));
762+
comm_pattern[i] = (double *)calloc(num_procs_in_node, sizeof(double));
763+
memcpy((void *)comm_pattern[i],
764+
(void *)(local_pattern + i*num_procs_in_node),
765+
num_procs_in_node*sizeof(double));
762766
}
763767
/* Matrix needs to be symmetric */
764768
for( i = 0 ; i < num_procs_in_node ; i++)
765769
for(j = i ; j < num_procs_in_node ; j++){
766-
comm_pattern[i][j] += comm_pattern[j][i];
767-
comm_pattern[j][i] = comm_pattern[i][j];
770+
comm_pattern[i][j] = (comm_pattern[i][j] + comm_pattern[j][i]) / 2;
771+
comm_pattern[j][i] = comm_pattern[i][j];
768772
}
769-
for( i = 0 ; i < num_procs_in_node ; i++)
770-
for(j = 0 ; j < num_procs_in_node ; j++)
771-
comm_pattern[i][j] /= 2;
772773

773774
#ifdef __DEBUG__
774775
fprintf(stdout,"========== COMM PATTERN ============= \n");
775776
for(i = 0 ; i < num_procs_in_node ; i++){
776777
fprintf(stdout," %i : ",i);
777778
for(j = 0; j < num_procs_in_node ; j++)
778-
fprintf(stdout," %f ",comm_pattern[i][j]);
779+
fprintf(stdout," %f ", comm_pattern[i][j]);
779780
fprintf(stdout,"\n");
780781
}
781782
fprintf(stdout,"======================= \n");
@@ -830,24 +831,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
830831
free(tm_topology->arity);
831832
free(tm_topology);
832833
FREE_topology(tm_opt_topology);
833-
} else {
834-
local_pattern = (double *)calloc(num_procs_in_node,sizeof(double));
835-
for(i = 0 ; i < num_procs_in_node ; i++)
836-
local_pattern[i] = 0.0;
837-
838-
if( true == topo->weighted ) {
839-
for(i = 0; i < topo->indegree ; i++)
840-
if (grank_to_lrank[topo->in[i]] != -1)
841-
local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
842-
for(i = 0; i < topo->outdegree ; i++)
843-
if (grank_to_lrank[topo->out[i]] != -1)
844-
local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
845-
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_gather(local_pattern, num_procs_in_node, MPI_DOUBLE,
846-
NULL,0,0,
847-
0,localcomm,
848-
localcomm->c_coll->coll_gather_module)))
849-
ERR_EXIT(err);
850-
}
851834
}
852835

853836
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_bcast(matching, num_procs_in_node,

ompi/mca/topo/treematch/treematch/tm_kpartitioning.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,7 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com
427427
verbose_level = get_verbose_level();
428428

429429
if(verbose_level>=INFO)
430-
printf("Number of constraints: %d\n", nb_constraints);
431-
printf("Number of constraints: %d, N=%d\n", nb_constraints, N);
430+
printf("Number of constraints: %d, N=%d\n", nb_constraints, N);
432431

433432
nb_cores=nb_processing_units(topology);
434433

ompi/mca/topo/treematch/treematch/tm_tree.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1614,7 +1614,8 @@ tree_t * build_tree_from_topology(tm_topology_t *topology, double **com_mat, int
16141614

16151615
nb_constraints = check_constraints (topology, &constraints);
16161616

1617-
printf("nb_constraints = %d, N= %d; nb_processing units = %d\n",nb_constraints, N, nb_processing_units(topology));
1617+
if(verbose_level>=INFO)
1618+
printf("nb_constraints = %d, N= %d; nb_processing units = %d\n",nb_constraints, N, nb_processing_units(topology));
16181619

16191620
if(N>nb_constraints){
16201621
if(verbose_level >= CRITICAL){

0 commit comments

Comments
 (0)