Skip to content

Commit 89d7a8d

Browse files
committed
broker: allow instance size > PMI bootstrap size
Problem: there is no way to bootstrap a flux instance using PMI with ranks (initially) missing. Allow the 'size' broker attribute to be set on the command line. If set to a value greater than the PMI size, perform the PMI exchange as usual with the PMI size, but configure the overlay topology with the additional ranks. Since 'hostlist' is an immutable attribute that is expected to be set by the bootstrap implementation, set it to include placeholders for the ranks that haven't connected yet "extra[0-N]" so we get something other than "(null)" in the logs.
1 parent 359cd1a commit 89d7a8d

File tree

2 files changed

+51
-18
lines changed

2 files changed

+51
-18
lines changed

src/broker/boot_pmi.c

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,22 @@ static char *pmi_mapping_to_taskmap (const char *s)
7575
}
7676

7777
/* Set broker.mapping attribute from enclosing instance taskmap.
78+
* Skip setting the taskmap if extra_ranks is nonzero since the
79+
* mapping of those ranks is unknown. N.B. when broker.mapping is missing,
80+
* use_ipc() below will return false, a good thing since it is unknown
81+
* whether ipc:// would work for the TBON wire-up of extra nodes.
7882
*/
7983
static int set_broker_mapping_attr (struct upmi *upmi,
8084
int size,
81-
attr_t *attrs)
85+
attr_t *attrs,
86+
int extra_ranks)
8287
{
8388
char *val = NULL;
8489
int rc;
8590

8691
if (size == 1)
8792
val = strdup ("[[0,1,1,1]]");
88-
else {
93+
else if (extra_ranks == 0) {
8994
/* First attempt to get flux.taskmap, falling back to
9095
* PMI_process_mapping if this key is not available.
9196
*/
@@ -163,7 +168,7 @@ static int format_bind_uri (char *buf, int bufsz, attr_t *attrs, int rank)
163168
return -1;
164169
}
165170

166-
static int set_hostlist_attr (attr_t *attrs, struct hostlist *hl)
171+
static int set_hostlist_attr (attr_t *attrs,struct hostlist *hl)
167172
{
168173
const char *value;
169174
char *s;
@@ -219,9 +224,10 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
219224
int child_count;
220225
int *child_ranks = NULL;
221226
const char *uri;
222-
int i;
223227
int upmi_flags = UPMI_LIBPMI_NOFLUX;
224228
const char *upmi_method;
229+
const char *s;
230+
int size;
225231

226232
// N.B. overlay_create() sets the tbon.topo attribute
227233
if (attr_get (attrs, "tbon.topo", &topo_uri, NULL) < 0) {
@@ -248,11 +254,23 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
248254
log_err ("set_instance_level_attr");
249255
goto error;
250256
}
251-
if (set_broker_mapping_attr (upmi, info.size, attrs) < 0) {
257+
/* Allow the PMI size to be overridden with a larger one so that
258+
* additional ranks can be grafted on later.
259+
*/
260+
size = info.size;
261+
if (attr_get (attrs, "size", &s, NULL) == 0) {
262+
errno = 0;
263+
size = strtoul (s, NULL, 10);
264+
if (errno != 0 || size <= info.size) {
265+
log_msg ("instance size may only be increased");
266+
goto error;
267+
}
268+
}
269+
if (set_broker_mapping_attr (upmi, size, attrs, size - info.size) < 0) {
252270
log_err ("error setting broker.mapping attribute");
253271
goto error;
254272
}
255-
if (!(topo = topology_create (topo_uri, info.size, &error))) {
273+
if (!(topo = topology_create (topo_uri, size, &error))) {
256274
log_msg ("error creating '%s' topology: %s", topo_uri, error.text);
257275
goto error;
258276
}
@@ -268,16 +286,6 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
268286
goto error;
269287
}
270288

271-
/* A size=1 instance has no peers, so skip the PMI exchange.
272-
*/
273-
if (info.size == 1) {
274-
if (hostlist_append (hl, hostname) < 0) {
275-
log_err ("hostlist_append");
276-
goto error;
277-
}
278-
goto done;
279-
}
280-
281289
/* Enable ipv6 for maximum flexibility in address selection.
282290
*/
283291
overlay_set_ipv6 (overlay, 1);
@@ -309,6 +317,16 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
309317
goto error;
310318
}
311319

320+
/* If the PMI size is 1, then skip the PMI exchange entirely.
321+
*/
322+
if (info.size == 1) {
323+
if (hostlist_append (hl, hostname) < 0) {
324+
log_err ("hostlist_append");
325+
goto error;
326+
}
327+
goto done;
328+
}
329+
312330
/* Each broker writes a "business card" consisting of hostname,
313331
* public key, and URI (empty string for leaf node).
314332
*/
@@ -380,10 +398,13 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
380398

381399
/* Fetch the business card of children and inform overlay of public keys.
382400
*/
383-
for (i = 0; i < child_count; i++) {
401+
for (int i = 0; i < child_count; i++) {
384402
const char *peer_pubkey;
385403
int child_rank = child_ranks[i];
386404

405+
if (child_rank >= info.size)
406+
break;
407+
387408
if (snprintf (key, sizeof (key), "%d", child_rank) >= sizeof (key)) {
388409
log_msg ("pmi key string overflow");
389410
goto error;
@@ -412,7 +433,7 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
412433
/* Fetch the business card of all ranks and build hostlist.
413434
* The hostlist is built indepenedently (and in parallel) on all ranks.
414435
*/
415-
for (i = 0; i < info.size; i++) {
436+
for (int i = 0; i < info.size; i++) {
416437
const char *peer_hostname;
417438

418439
if (snprintf (key, sizeof (key), "%d", i) >= sizeof (key)) {
@@ -449,6 +470,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
449470
}
450471

451472
done:
473+
/* If the instance size is greater than the PMI size, add placeholder
474+
* names to the hostlist for the ranks that haven't joined yet.
475+
*/
476+
for (int i = info.size; i < size; i++) {
477+
char buf[64];
478+
snprintf (buf, sizeof (buf), "extra%d", i - info.size);
479+
if (hostlist_append (hl, buf) < 0) {
480+
log_err ("hostlist_append");
481+
goto error;
482+
}
483+
}
452484
if (set_hostlist_attr (attrs, hl) < 0) {
453485
log_err ("setattr hostlist");
454486
goto error;

src/broker/overlay.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,6 +1455,7 @@ int overlay_register_attrs (struct overlay *overlay)
14551455
overlay->rank,
14561456
ATTR_IMMUTABLE) < 0)
14571457
return -1;
1458+
(void)attr_delete (overlay->attrs, "size", true);
14581459
if (attr_add_uint32 (overlay->attrs,
14591460
"size", overlay->size,
14601461
ATTR_IMMUTABLE) < 0)

0 commit comments

Comments
 (0)