Skip to content

Commit 2e9f9f1

Browse files
committed
broker: allow instance size > PMI bootstrap size
Problem: there is no way to bootstrap a flux instance using PMI with ranks (initially) missing. Allow the 'size' broker attribute to be set on the command line. If set to a value greater than the PMI size, perform the PMI exchange as usual with the PMI size, but configure the overlay topology with the additional ranks. Since 'hostlist' is an immutable attribute that is expected to be set by the bootstrap implementation, set it to include placeholders for the ranks that haven't connected yet "extra[0-N]" so we get something other than "(null)" in the logs.
1 parent 7ee4e69 commit 2e9f9f1

File tree

2 files changed

+51
-18
lines changed

2 files changed

+51
-18
lines changed

src/broker/boot_pmi.c

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,22 @@ static char *pmi_mapping_to_taskmap (const char *s)
7575
}
7676

7777
/* Set broker.mapping attribute from enclosing instance taskmap.
78+
* Skip setting the taskmap if extra_ranks is nonzero since the
79+
* mapping of those ranks is unknown. N.B. when broker.mapping is missing,
80+
* use_ipc() below will return false, a good thing since it is unknown
81+
* whether ipc:// would work for the TBON wire-up of extra nodes.
7882
*/
7983
static int set_broker_mapping_attr (struct upmi *upmi,
8084
int size,
81-
attr_t *attrs)
85+
attr_t *attrs,
86+
int extra_ranks)
8287
{
8388
char *val = NULL;
8489
int rc;
8590

8691
if (size == 1)
8792
val = strdup ("[[0,1,1,1]]");
88-
else {
93+
else if (extra_ranks == 0) {
8994
/* First attempt to get flux.taskmap, falling back to
9095
* PMI_process_mapping if this key is not available.
9196
*/
@@ -163,7 +168,7 @@ static int format_bind_uri (char *buf, int bufsz, attr_t *attrs, int rank)
163168
return -1;
164169
}
165170

166-
static int set_hostlist_attr (attr_t *attrs, struct hostlist *hl)
171+
static int set_hostlist_attr (attr_t *attrs,struct hostlist *hl)
167172
{
168173
const char *value;
169174
char *s;
@@ -220,9 +225,10 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
220225
int child_count;
221226
int *child_ranks = NULL;
222227
const char *uri;
223-
int i;
224228
int upmi_flags = UPMI_LIBPMI_NOFLUX;
225229
const char *upmi_method;
230+
const char *s;
231+
int size;
226232

227233
// N.B. overlay_create() sets the tbon.topo attribute
228234
if (attr_get (attrs, "tbon.topo", &topo_uri, NULL) < 0) {
@@ -249,11 +255,23 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
249255
log_err ("set_instance_level_attr");
250256
goto error;
251257
}
252-
if (set_broker_mapping_attr (upmi, info.size, attrs) < 0) {
258+
/* Allow the PMI size to be overridden with a larger one so that
259+
* additional ranks can be grafted on later.
260+
*/
261+
size = info.size;
262+
if (attr_get (attrs, "size", &s, NULL) == 0) {
263+
errno = 0;
264+
size = strtoul (s, NULL, 10);
265+
if (errno != 0 || size <= info.size) {
266+
log_msg ("instance size may only be increased");
267+
goto error;
268+
}
269+
}
270+
if (set_broker_mapping_attr (upmi, size, attrs, size - info.size) < 0) {
253271
log_err ("error setting broker.mapping attribute");
254272
goto error;
255273
}
256-
if (!(topo = topology_create (topo_uri, info.size, &error))) {
274+
if (!(topo = topology_create (topo_uri, size, &error))) {
257275
log_msg ("error creating '%s' topology: %s", topo_uri, error.text);
258276
goto error;
259277
}
@@ -269,16 +287,6 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
269287
goto error;
270288
}
271289

272-
/* A size=1 instance has no peers, so skip the PMI exchange.
273-
*/
274-
if (info.size == 1) {
275-
if (hostlist_append (hl, hostname) < 0) {
276-
log_err ("hostlist_append");
277-
goto error;
278-
}
279-
goto done;
280-
}
281-
282290
/* Enable ipv6 for maximum flexibility in address selection.
283291
*/
284292
overlay_set_ipv6 (overlay, 1);
@@ -310,6 +318,16 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
310318
goto error;
311319
}
312320

321+
/* If the PMI size is 1, then skip the PMI exchange entirely.
322+
*/
323+
if (info.size == 1) {
324+
if (hostlist_append (hl, hostname) < 0) {
325+
log_err ("hostlist_append");
326+
goto error;
327+
}
328+
goto done;
329+
}
330+
313331
/* Each broker writes a "business card" consisting of hostname,
314332
* public key, and URI (empty string for leaf node).
315333
*/
@@ -390,10 +408,13 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
390408

391409
/* Fetch the business card of children and inform overlay of public keys.
392410
*/
393-
for (i = 0; i < child_count; i++) {
411+
for (int i = 0; i < child_count; i++) {
394412
const char *peer_pubkey;
395413
int child_rank = child_ranks[i];
396414

415+
if (child_rank >= info.size)
416+
break;
417+
397418
if (snprintf (key, sizeof (key), "%d", child_rank) >= sizeof (key)) {
398419
log_msg ("pmi key string overflow");
399420
goto error;
@@ -428,7 +449,7 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
428449
/* Fetch the business card of all ranks and build hostlist.
429450
* The hostlist is built independently (and in parallel) on all ranks.
430451
*/
431-
for (i = 0; i < info.size; i++) {
452+
for (int i = 0; i < info.size; i++) {
432453
const char *peer_hostname;
433454

434455
if (snprintf (key, sizeof (key), "%d", i) >= sizeof (key)) {
@@ -471,6 +492,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
471492
}
472493

473494
done:
495+
/* If the instance size is greater than the PMI size, add placeholder
496+
* names to the hostlist for the ranks that haven't joined yet.
497+
*/
498+
for (int i = info.size; i < size; i++) {
499+
char buf[64];
500+
snprintf (buf, sizeof (buf), "extra%d", i - info.size);
501+
if (hostlist_append (hl, buf) < 0) {
502+
log_err ("hostlist_append");
503+
goto error;
504+
}
505+
}
474506
if (set_hostlist_attr (attrs, hl) < 0) {
475507
log_err ("setattr hostlist");
476508
goto error;

src/broker/overlay.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,7 @@ int overlay_register_attrs (struct overlay *overlay)
15901590
overlay->rank,
15911591
ATTR_IMMUTABLE) < 0)
15921592
return -1;
1593+
(void)attr_delete (overlay->attrs, "size", true);
15931594
if (attr_add_uint32 (overlay->attrs,
15941595
"size", overlay->size,
15951596
ATTR_IMMUTABLE) < 0)

0 commit comments

Comments
 (0)