Skip to content

Commit 590f23c

Browse files
authored
Merge pull request #6808 from garlick/3level
broker: add 'mincrit' topology
2 parents f6a9548 + 59043e1 commit 590f23c

File tree

5 files changed

+193
-15
lines changed

5 files changed

+193
-15
lines changed

doc/man7/flux-broker-attributes.rst

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,12 +177,30 @@ TREE BASED OVERLAY NETWORK
177177
==========================
178178

179179
tbon.topo [Updates: C]
180-
URI describing the TBON tree topology such as ``kary:16``. The ``kary``
181-
scheme selects a complete, k-ary tree with fanout *k*, with ``kary:0``
182-
meaning that rank 0 is the parent of all other ranks by convention. The
183-
``binomial`` scheme selects a binomial tree topology of the minimum order
184-
that fits the instance size. Default: ``kary:32``, unless bootstrapping by
185-
TOML configuration, then see :man5:`flux-config-bootstrap`.
180+
A URI describing the TBON tree topology. The following schemes are
181+
available:
182+
183+
kary:k
184+
A complete, k-ary tree with fanout *k*. By convention, ``kary:0``
185+
indicates that rank 0 is the parent of all other ranks.
186+
187+
mincrit[:k]
188+
Minimize critical ranks. The tree height is limited to three.
189+
If *k* is specified, it sets the number of router (interior) ranks,
190+
making the number of critical ranks *k* plus one. The fanout from
191+
routers to leaves is determined by the instance size. If *k* is
192+
unspecified, *k* is set to a value that avoids exceeding a fanout of
193+
1024 at any level. If that cannot be achieved in three levels,
194+
then rank 0 is overloaded.
195+
196+
binomial
197+
Binomial tree topology of the minimum order that fits the instance size.
198+
199+
custom
200+
The topology is set by TOML configuration.
201+
See :man5:`flux-config-bootstrap`.
202+
203+
The default value is ``kary:32``.
186204

187205
tbon.descendants
188206
Number of descendants "below" this node of the tree based

doc/test/spell.en.pws

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -969,3 +969,4 @@ YQ
969969
composable
970970
orchestrator
971971
fsck
972+
mincrit

src/broker/test/topology.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,24 @@ struct internal_ranks_test internal_ranks_tests[] = {
250250
{ 4, "binomial","0,2" },
251251
{ 8, "binomial","0,2,4,6" },
252252
{ 16, "binomial","0,2,4,6,8,10,12,14" },
253+
{ 4096, "mincrit:0", "0" },
254+
{ 1, "mincrit:2", "" },
255+
{ 2, "mincrit:2", "0" },
256+
{ 3, "mincrit:2", "0" },
257+
{ 4, "mincrit:2", "0-1" },
258+
{ 5, "mincrit:2", "0-2" },
259+
{ 6, "mincrit:2", "0-2" },
260+
{ 7, "mincrit:2", "0-2" },
261+
{ 1024, "mincrit:2", "0-2" },
262+
{ 512, "mincrit", "0" },
263+
// leader has 1024 children at size=1024+1=1025
264+
{ 1025, "mincrit", "0" },
265+
{ 1026, "mincrit", "0-2" },
266+
// 2 routers have 1024 children each at size=1024+1024+3=2051
267+
{ 2051, "mincrit", "0-2" },
268+
{ 2052, "mincrit", "0-3" },
269+
// a large el capitan run
270+
{ 10800, "mincrit", "0-11" },
253271
{ -1, NULL, NULL }
254272
};
255273

@@ -637,6 +655,60 @@ void test_binomial5 (void)
637655
topology_decref (topo);
638656
}
639657

658+
void test_mincrit5 (void)
659+
{
660+
struct topology *topo;
661+
flux_error_t error;
662+
663+
topo = topology_create ("mincrit:zz", 5, &error);
664+
if (!topo)
665+
diag ("%s", error.text);
666+
ok (topo == NULL,
667+
"mincrit topology fails with unknown path");
668+
669+
topo = topology_create ("mincrit:2", 5, &error);
670+
ok (topo != NULL,
671+
"mincrit:2 topology of size=5 works");
672+
673+
ok (topology_set_rank (topo, 1) == 0,
674+
"set rank to 1");
675+
ok (topology_get_parent (topo) == 0,
676+
"rank 1 parent is 0");
677+
ok (topology_get_child_ranks (topo, NULL, 0) == 1,
678+
"rank 1 has one child");
679+
ok (topology_get_level (topo) == 1,
680+
"rank 1 level is 1");
681+
682+
ok (topology_set_rank (topo, 2) == 0,
683+
"set rank to 2");
684+
ok (topology_get_parent (topo) == 0,
685+
"rank 2 parent is 0");
686+
ok (topology_get_child_ranks (topo, NULL, 0) == 1,
687+
"rank 2 has one child");
688+
ok (topology_get_level (topo) == 1,
689+
"rank 2 level is 1");
690+
691+
ok (topology_set_rank (topo, 3) == 0,
692+
"set rank to 3");
693+
ok (topology_get_parent (topo) == 1,
694+
"rank 3 parent is 1");
695+
ok (topology_get_child_ranks (topo, NULL, 0) == 0,
696+
"rank 3 has no children");
697+
ok (topology_get_level (topo) == 2,
698+
"rank 3 level is 2");
699+
700+
ok (topology_set_rank (topo, 4) == 0,
701+
"set rank to 4");
702+
ok (topology_get_parent (topo) == 2,
703+
"rank 4 parent is 2");
704+
ok (topology_get_child_ranks (topo, NULL, 0) == 0,
705+
"rank 4 has no children");
706+
ok (topology_get_level (topo) == 2,
707+
"rank 4 level is 2");
708+
709+
topology_decref (topo);
710+
}
711+
640712
int main (int argc, char *argv[])
641713
{
642714
plan (NO_PLAN);
@@ -650,6 +722,7 @@ int main (int argc, char *argv[])
650722
test_custom ();
651723
test_rank_aux ();
652724
test_binomial5 ();
725+
test_mincrit5 ();
653726

654727
done_testing ();
655728
}

src/broker/topology.c

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ struct topology {
4040
static int kary_plugin_init (struct topology *topo,
4141
const char *path,
4242
flux_error_t *error);
43+
static int mincrit_plugin_init (struct topology *topo,
44+
const char *path,
45+
flux_error_t *error);
4346
static int binomial_plugin_init (struct topology *topo,
4447
const char *path,
4548
flux_error_t *error);
@@ -49,6 +52,7 @@ static int custom_plugin_init (struct topology *topo,
4952

5053
static const struct topology_plugin builtin_plugins[] = {
5154
{ .name = "kary", .init = kary_plugin_init },
55+
{ .name = "mincrit", .init = mincrit_plugin_init },
5256
{ .name = "binomial", .init = binomial_plugin_init },
5357
{ .name = "custom", .init = custom_plugin_init },
5458
};
@@ -381,34 +385,33 @@ struct idset *topology_get_internal_ranks (struct topology *topo)
381385
return NULL;
382386
}
383387

384-
/* kary plugin
385-
*/
386-
static int parse_k (const char *s, int *result, flux_error_t *error)
388+
static int parse_k (const char *s, int *result)
387389
{
388390
char *endptr;
389391
unsigned long val;
390392

391393
if (!s || strlen (s) == 0)
392-
goto error;
394+
return -1;
393395
errno = 0;
394396
val = strtoul (s, &endptr, 10);
395397
if (errno != 0 || *endptr != '\0')
396-
goto error;
398+
return -1;
397399
*result = val;
398400
return 0;
399-
error:
400-
errprintf (error, "kary k value must be an integer >= 0");
401-
return -1;
402401
}
403402

403+
/* kary plugin
404+
*/
404405
static int kary_plugin_init (struct topology *topo,
405406
const char *path,
406407
flux_error_t *error)
407408
{
408409
int k;
409410

410-
if (parse_k (path, &k, error) < 0)
411+
if (parse_k (path, &k) < 0) {
412+
errprintf (error, "kary k value must be an integer >= 0");
411413
return -1;
414+
}
412415
if (k > 0) {
413416
for (int i = 0; i < topo->size; i++) {
414417
topo->node[i].parent = kary_parentof (k, i);
@@ -419,6 +422,70 @@ static int kary_plugin_init (struct topology *topo,
419422
return 0;
420423
}
421424

425+
/* Given size and k (number of routers), determine fanout from
426+
* routers to leaves.
427+
*/
428+
static int mincrit_router_fanout (int size, int k)
429+
{
430+
int crit = 1 + k;
431+
int leaves = size - crit;
432+
int fanout = leaves / k;
433+
if (leaves % k > 0)
434+
fanout++;
435+
return fanout;
436+
}
437+
438+
/* Choose a value for k that balances minimizing critical nodes and
439+
* keeping the fanout from routers to leaves at or below a threshold.
440+
* The height is always capped at 3, so max_fanout might be exceeded
441+
* from leader to routers for large size or small max_fanout.
442+
* Do choose k=0 (flat tree) if max_fanount can be met by the leader node.
443+
* Don't choose k=1, since that just pushes some router work off
444+
* to rank 1, without tree benefits.
445+
*/
446+
static int mincrit_choose_k (int size, int max_fanout)
447+
{
448+
int k = 0;
449+
if (size > max_fanout + 1) {
450+
k = 2;
451+
while (mincrit_router_fanout (size, k) > max_fanout)
452+
k++;
453+
}
454+
return k;
455+
}
456+
457+
/* mincrit plugin
458+
* A k-ary tree "squashed" down to at most three levels.
459+
* The value of k determines the fanout from leader to routers.
460+
* The number of nodes determines the fanout from routers to leaves.
461+
* The value of k may be 0, or be unspecifed (letting the system choose).
462+
*/
463+
static int mincrit_plugin_init (struct topology *topo,
464+
const char *path,
465+
flux_error_t *error)
466+
{
467+
int k;
468+
469+
if (path && strlen (path) > 0) {
470+
if (parse_k (path, &k) < 0) {
471+
errprintf (error, "mincrit k value must be an integer >= 0");
472+
return -1;
473+
}
474+
}
475+
else {
476+
k = mincrit_choose_k (topo->size, 1024);
477+
}
478+
/* N.B. topo is initialized with rank 0 as the parent of all other ranks
479+
* before plugin init is called, therefore only the leaves need to have
480+
* their parent set here.
481+
*/
482+
if (k > 0) {
483+
for (int i = k + 1; i < topo->size; i++)
484+
topo->node[i].parent = (i - k - 1) % k + 1;
485+
}
486+
return 0;
487+
}
488+
422489
/* binomial plugin
423490
*/
424491
static int binomial_smallest_k (int size)

t/t0019-tbon-config.t

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,4 +168,23 @@ test_expect_success 'broker -Stbon.topo=binomial option works' '
168168
flux getattr tbon.topo >topo_binomial.out &&
169169
test_cmp topo_binomial.exp topo_binomial.out
170170
'
171+
test_expect_success 'broker -Stbon.topo=mincrit option works' '
172+
echo mincrit >topo_mincrit.exp &&
173+
flux start ${ARGS} -Stbon.topo=mincrit \
174+
flux getattr tbon.topo >topo_mincrit.out &&
175+
test_cmp topo_mincrit.exp topo_mincrit.out
176+
'
177+
test_expect_success 'broker -Stbon.topo=mincrit:2 option works' '
178+
echo 2 >topo_crit2.exp &&
179+
flux start -s5 ${ARGS} -Stbon.topo=mincrit:2 \
180+
flux getattr tbon.maxlevel >topo_crit2.out &&
181+
test_cmp topo_crit2.exp topo_crit2.out
182+
'
183+
test_expect_success 'broker -Stbon.topo=mincrit is flat for small size' '
184+
echo 1 >topo_critsmall.exp &&
185+
flux start -s5 ${ARGS} -Stbon.topo=mincrit \
186+
flux getattr tbon.maxlevel >topo_critsmall.out &&
187+
test_cmp topo_critsmall.exp topo_critsmall.out
188+
'
189+
171190
test_done

0 commit comments

Comments
 (0)