Skip to content

Commit ba3c3be

Browse files
committed
broker: allow instance size > PMI bootstrap size
Problem: there is no way to bootstrap a flux instance using PMI with ranks (initially) missing. Allow the 'size' broker attribute to be set on the command line. If set to a value greater than the PMI size, perform the PMI exchange as usual with the PMI size, but configure the overlay topology with the additional ranks. Since 'hostlist' is an immutable attribute that is expected to be set by the bootstrap implementation, set it to include placeholders for the ranks that haven't connected yet "extra[0-N]" so we get something other than "(null)" in the logs.
1 parent 853fc77 commit ba3c3be

File tree

2 files changed

+51
-18
lines changed

2 files changed

+51
-18
lines changed

src/broker/boot_pmi.c

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,22 @@ static char *pmi_mapping_to_taskmap (const char *s)
7676
}
7777

7878
/* Set broker.mapping attribute from enclosing instance taskmap.
79+
* Skip setting the taskmap if extra_ranks is nonzero since the
80+
* mapping of those ranks is unknown. N.B. when broker.mapping is missing,
81+
* use_ipc() below will return false, a good thing since it is unknown
82+
* whether ipc:// would work for the TBON wire-up of extra nodes.
7983
*/
8084
static int set_broker_mapping_attr (struct upmi *upmi,
8185
int size,
82-
attr_t *attrs)
86+
attr_t *attrs,
87+
int extra_ranks)
8388
{
8489
char *val = NULL;
8590
int rc;
8691

8792
if (size == 1)
8893
val = strdup ("[[0,1,1,1]]");
89-
else {
94+
else if (extra_ranks == 0) {
9095
/* First attempt to get flux.taskmap, falling back to
9196
* PMI_process_mapping if this key is not available.
9297
*/
@@ -164,7 +169,7 @@ static int format_bind_uri (char *buf, int bufsz, attr_t *attrs, int rank)
164169
return -1;
165170
}
166171

167-
static int set_hostlist_attr (attr_t *attrs, struct hostlist *hl)
172+
static int set_hostlist_attr (attr_t *attrs,struct hostlist *hl)
168173
{
169174
const char *value;
170175
char *s;
@@ -220,9 +225,10 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
220225
int child_count;
221226
int *child_ranks = NULL;
222227
const char *uri;
223-
int i;
224228
int upmi_flags = UPMI_LIBPMI_NOFLUX;
225229
const char *upmi_method;
230+
const char *s;
231+
int size;
226232

227233
// N.B. overlay_create() sets the tbon.topo attribute
228234
if (attr_get (attrs, "tbon.topo", &topo_uri, NULL) < 0) {
@@ -249,11 +255,23 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
249255
log_err ("set_instance_level_attr");
250256
goto error;
251257
}
252-
if (set_broker_mapping_attr (upmi, info.size, attrs) < 0) {
258+
/* Allow the PMI size to be overridden with a larger one so that
259+
* additional ranks can be grafted on later.
260+
*/
261+
size = info.size;
262+
if (attr_get (attrs, "size", &s, NULL) == 0) {
263+
errno = 0;
264+
size = strtoul (s, NULL, 10);
265+
if (errno != 0 || size <= info.size) {
266+
log_msg ("instance size may only be increased");
267+
goto error;
268+
}
269+
}
270+
if (set_broker_mapping_attr (upmi, size, attrs, size - info.size) < 0) {
253271
log_err ("error setting broker.mapping attribute");
254272
goto error;
255273
}
256-
if (!(topo = topology_create (topo_uri, info.size, &error))) {
274+
if (!(topo = topology_create (topo_uri, size, &error))) {
257275
log_msg ("error creating '%s' topology: %s", topo_uri, error.text);
258276
goto error;
259277
}
@@ -269,16 +287,6 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
269287
goto error;
270288
}
271289

272-
/* A size=1 instance has no peers, so skip the PMI exchange.
273-
*/
274-
if (info.size == 1) {
275-
if (hostlist_append (hl, hostname) < 0) {
276-
log_err ("hostlist_append");
277-
goto error;
278-
}
279-
goto done;
280-
}
281-
282290
/* Enable ipv6 for maximum flexibility in address selection.
283291
*/
284292
overlay_set_ipv6 (overlay, 1);
@@ -310,6 +318,16 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
310318
goto error;
311319
}
312320

321+
/* If the PMI size is 1, then skip the PMI exchange entirely.
322+
*/
323+
if (info.size == 1) {
324+
if (hostlist_append (hl, hostname) < 0) {
325+
log_err ("hostlist_append");
326+
goto error;
327+
}
328+
goto done;
329+
}
330+
313331
/* Each broker writes a "business card" consisting of hostname,
314332
* public key, and URI (empty string for leaf node).
315333
*/
@@ -381,10 +399,13 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
381399

382400
/* Fetch the business card of children and inform overlay of public keys.
383401
*/
384-
for (i = 0; i < child_count; i++) {
402+
for (int i = 0; i < child_count; i++) {
385403
const char *peer_pubkey;
386404
int child_rank = child_ranks[i];
387405

406+
if (child_rank >= info.size)
407+
break;
408+
388409
if (snprintf (key, sizeof (key), "%d", child_rank) >= sizeof (key)) {
389410
log_msg ("pmi key string overflow");
390411
goto error;
@@ -413,7 +434,7 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
413434
/* Fetch the business card of all ranks and build hostlist.
414435
* The hostlist is built independently (and in parallel) on all ranks.
415436
*/
416-
for (i = 0; i < info.size; i++) {
437+
for (int i = 0; i < info.size; i++) {
417438
const char *peer_hostname;
418439

419440
if (snprintf (key, sizeof (key), "%d", i) >= sizeof (key)) {
@@ -450,6 +471,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
450471
}
451472

452473
done:
474+
/* If the instance size is greater than the PMI size, add placeholder
475+
* names to the hostlist for the ranks that haven't joined yet.
476+
*/
477+
for (int i = info.size; i < size; i++) {
478+
char buf[64];
479+
snprintf (buf, sizeof (buf), "extra%d", i - info.size);
480+
if (hostlist_append (hl, buf) < 0) {
481+
log_err ("hostlist_append");
482+
goto error;
483+
}
484+
}
453485
if (set_hostlist_attr (attrs, hl) < 0) {
454486
log_err ("setattr hostlist");
455487
goto error;

src/broker/overlay.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,7 @@ int overlay_register_attrs (struct overlay *overlay)
14541454
overlay->rank,
14551455
ATTR_IMMUTABLE) < 0)
14561456
return -1;
1457+
(void)attr_delete (overlay->attrs, "size", true);
14571458
if (attr_add_uint32 (overlay->attrs,
14581459
"size", overlay->size,
14591460
ATTR_IMMUTABLE) < 0)

0 commit comments

Comments
 (0)