From 9bd775769b2d64286a50c768d3a543312de4aaa5 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 2 Nov 2022 09:51:54 -0400 Subject: [PATCH] tuned: use tree instead of bruck at scale The switch from tree to bruck between 512 and 1023 processes leads to unexpected latency changes in benchmarks of other collectives. We should be consistent here. There is no good reason for why bruck would perform better in that range but not beyond. Signed-off-by: Joseph Schuchart --- ompi/mca/coll/tuned/coll_tuned_decision_fixed.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index fa31aef1860..d210ff4412f 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -490,14 +490,8 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm, alg = 3; } else if (communicator_size < 256) { alg = 4; - } else if (communicator_size < 512) { - alg = 6; - } else if (communicator_size < 1024) { - alg = 4; - } else if (communicator_size < 4096) { - alg = 6; } else { - alg = 4; + alg = 6; } return ompi_coll_tuned_barrier_intra_do_this (comm, module,