Skip to content

Commit e9573d6

Browse files
committed
broker: allow instance size > PMI bootstrap size
Problem: there is no way to bootstrap a flux instance using PMI with ranks (initially) missing. Allow the 'size' broker attribute to be set on the command line. If set to a value greater than the PMI size, perform the PMI exchange as usual with the PMI size, but configure the overlay topology with the additional ranks. Since 'hostlist' is an immutable attribute that is expected to be set by the bootstrap implementation, set it to include placeholders for the ranks that haven't connected yet "extra[0-N]" so we get something other than "(null)" in the logs.
1 parent 2ffd96b commit e9573d6

File tree

2 files changed

+49
-15
lines changed

2 files changed

+49
-15
lines changed

src/broker/boot_pmi.c

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,22 @@ static char *pmi_mapping_to_taskmap (const char *s)
7676
}
7777

7878
/* Set broker.mapping attribute from enclosing instance taskmap.
79+
* Skip setting the taskmap if extra_ranks is nonzero since the
80+
* mapping of those ranks is unknown. N.B. when broker.mapping is missing,
81+
* use_ipc() below will return false, a good thing since it is unknown
82+
* whether ipc:// would work for the TBON wire-up of extra nodes.
7983
*/
8084
static int set_broker_mapping_attr (struct upmi *upmi,
8185
int size,
82-
attr_t *attrs)
86+
attr_t *attrs,
87+
int extra_ranks)
8388
{
8489
char *val = NULL;
8590
int rc;
8691

8792
if (size == 1)
8893
val = strdup ("[[0,1,1,1]]");
89-
else {
94+
else if (extra_ranks == 0) {
9095
/* First attempt to get flux.taskmap, falling back to
9196
* PMI_process_mapping if this key is not available.
9297
*/
@@ -164,7 +169,7 @@ static int format_bind_uri (char *buf, int bufsz, attr_t *attrs, int rank)
164169
return -1;
165170
}
166171

167-
static int set_hostlist_attr (attr_t *attrs, struct hostlist *hl)
172+
static int set_hostlist_attr (attr_t *attrs,struct hostlist *hl)
168173
{
169174
const char *value;
170175
char *s;
@@ -223,6 +228,8 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
223228
int i;
224229
int upmi_flags = UPMI_LIBPMI_NOFLUX;
225230
const char *upmi_method;
231+
const char *s;
232+
int size;
226233

227234
// N.B. overlay_create() sets the tbon.topo attribute
228235
if (attr_get (attrs, "tbon.topo", &topo_uri, NULL) < 0) {
@@ -249,11 +256,23 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
249256
log_err ("set_instance_level_attr");
250257
goto error;
251258
}
252-
if (set_broker_mapping_attr (upmi, info.size, attrs) < 0) {
259+
/* Allow the PMI size to be overridden with a larger one so that
260+
* additional ranks can be grafted on later.
261+
*/
262+
size = info.size;
263+
if (attr_get (attrs, "size", &s, NULL) == 0) {
264+
errno = 0;
265+
size = strtoul (s, NULL, 10);
266+
if (errno != 0 || size <= info.size) {
267+
log_msg ("instance size may only be increased");
268+
goto error;
269+
}
270+
}
271+
if (set_broker_mapping_attr (upmi, size, attrs, size - info.size) < 0) {
253272
log_err ("error setting broker.mapping attribute");
254273
goto error;
255274
}
256-
if (!(topo = topology_create (topo_uri, info.size, &error))) {
275+
if (!(topo = topology_create (topo_uri, size, &error))) {
257276
log_msg ("error creating '%s' topology: %s", topo_uri, error.text);
258277
goto error;
259278
}
@@ -269,16 +288,6 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
269288
goto error;
270289
}
271290

272-
/* A size=1 instance has no peers, so skip the PMI exchange.
273-
*/
274-
if (info.size == 1) {
275-
if (hostlist_append (hl, hostname) < 0) {
276-
log_err ("hostlist_append");
277-
goto error;
278-
}
279-
goto done;
280-
}
281-
282291
/* Enable ipv6 for maximum flexibility in address selection.
283292
*/
284293
overlay_set_ipv6 (overlay, 1);
@@ -310,6 +319,16 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
310319
goto error;
311320
}
312321

322+
/* If the PMI size is 1, then skip the PMI exchange entirely.
323+
*/
324+
if (info.size == 1) {
325+
if (hostlist_append (hl, hostname) < 0) {
326+
log_err ("hostlist_append");
327+
goto error;
328+
}
329+
goto done;
330+
}
331+
313332
/* Each broker writes a "business card" consisting of hostname,
314333
* public key, and URI (empty string for leaf node).
315334
*/
@@ -385,6 +404,9 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
385404
const char *peer_pubkey;
386405
int child_rank = child_ranks[i];
387406

407+
if (child_rank >= info.size)
408+
break;
409+
388410
if (snprintf (key, sizeof (key), "%d", child_rank) >= sizeof (key)) {
389411
log_msg ("pmi key string overflow");
390412
goto error;
@@ -450,6 +472,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
450472
}
451473

452474
done:
475+
/* If the instance size is greater than the PMI size, add placeholder
476+
* names to the hostlist for the ranks that haven't joined yet.
477+
*/
478+
for (int i = info.size; i < size; i++) {
479+
char buf[64];
480+
snprintf (buf, sizeof (buf), "extra%d", i - info.size);
481+
if (hostlist_append (hl, buf) < 0) {
482+
log_err ("hostlist_append");
483+
goto error;
484+
}
485+
}
453486
if (set_hostlist_attr (attrs, hl) < 0) {
454487
log_err ("setattr hostlist");
455488
goto error;

src/broker/overlay.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,7 @@ int overlay_register_attrs (struct overlay *overlay)
14541454
overlay->rank,
14551455
ATTR_IMMUTABLE) < 0)
14561456
return -1;
1457+
(void)attr_delete (overlay->attrs, "size", true);
14571458
if (attr_add_uint32 (overlay->attrs,
14581459
"size", overlay->size,
14591460
ATTR_IMMUTABLE) < 0)

0 commit comments

Comments
 (0)