@@ -76,17 +76,22 @@ static char *pmi_mapping_to_taskmap (const char *s)
7676}
7777
7878/* Set broker.mapping attribute from enclosing instance taskmap.
79+ * Skip setting the taskmap if extra_ranks is nonzero since the
80+ * mapping of those ranks is unknown. N.B. when broker.mapping is missing,
81+ * use_ipc() below will return false, a good thing since it is unknown
82+ * whether ipc:// would work for the TBON wire-up of extra nodes.
7983 */
8084static int set_broker_mapping_attr (struct upmi * upmi ,
8185 int size ,
82- attr_t * attrs )
86+ attr_t * attrs ,
87+ int extra_ranks )
8388{
8489 char * val = NULL ;
8590 int rc ;
8691
8792 if (size == 1 )
8893 val = strdup ("[[0,1,1,1]]" );
89- else {
94+ else if ( extra_ranks == 0 ) {
9095 /* First attempt to get flux.taskmap, falling back to
9196 * PMI_process_mapping if this key is not available.
9297 */
@@ -164,7 +169,7 @@ static int format_bind_uri (char *buf, int bufsz, attr_t *attrs, int rank)
164169 return -1 ;
165170}
166171
167- static int set_hostlist_attr (attr_t * attrs , struct hostlist * hl )
172+ static int set_hostlist_attr (attr_t * attrs ,struct hostlist * hl )
168173{
169174 const char * value ;
170175 char * s ;
@@ -223,6 +228,8 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
223228 int i ;
224229 int upmi_flags = UPMI_LIBPMI_NOFLUX ;
225230 const char * upmi_method ;
231+ const char * s ;
232+ int size ;
226233
227234 // N.B. overlay_create() sets the tbon.topo attribute
228235 if (attr_get (attrs , "tbon.topo" , & topo_uri , NULL ) < 0 ) {
@@ -249,11 +256,23 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
249256 log_err ("set_instance_level_attr" );
250257 goto error ;
251258 }
252- if (set_broker_mapping_attr (upmi , info .size , attrs ) < 0 ) {
259+ /* Allow the PMI size to be overridden with a larger one so that
260+ * additional ranks can be grafted on later.
261+ */
262+ size = info .size ;
263+ if (attr_get (attrs , "size" , & s , NULL ) == 0 ) {
264+ errno = 0 ;
265+ size = strtoul (s , NULL , 10 );
266+ if (errno != 0 || size <= info .size ) {
267+ log_msg ("instance size may only be increased" );
268+ goto error ;
269+ }
270+ }
271+ if (set_broker_mapping_attr (upmi , size , attrs , size - info .size ) < 0 ) {
253272 log_err ("error setting broker.mapping attribute" );
254273 goto error ;
255274 }
256- if (!(topo = topology_create (topo_uri , info . size , & error ))) {
275+ if (!(topo = topology_create (topo_uri , size , & error ))) {
257276 log_msg ("error creating '%s' topology: %s" , topo_uri , error .text );
258277 goto error ;
259278 }
@@ -269,16 +288,6 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
269288 goto error ;
270289 }
271290
272- /* A size=1 instance has no peers, so skip the PMI exchange.
273- */
274- if (info .size == 1 ) {
275- if (hostlist_append (hl , hostname ) < 0 ) {
276- log_err ("hostlist_append" );
277- goto error ;
278- }
279- goto done ;
280- }
281-
282291 /* Enable ipv6 for maximum flexibility in address selection.
283292 */
284293 overlay_set_ipv6 (overlay , 1 );
@@ -310,6 +319,16 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
310319 goto error ;
311320 }
312321
322+ /* If the PMI size is 1, then skip the PMI exchange entirely.
323+ */
324+ if (info .size == 1 ) {
325+ if (hostlist_append (hl , hostname ) < 0 ) {
326+ log_err ("hostlist_append" );
327+ goto error ;
328+ }
329+ goto done ;
330+ }
331+
313332 /* Each broker writes a "business card" consisting of hostname,
314333 * public key, and URI (empty string for leaf node).
315334 */
@@ -385,6 +404,9 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
385404 const char * peer_pubkey ;
386405 int child_rank = child_ranks [i ];
387406
407+ if (child_rank >= info .size )
408+ break ;
409+
388410 if (snprintf (key , sizeof (key ), "%d" , child_rank ) >= sizeof (key )) {
389411 log_msg ("pmi key string overflow" );
390412 goto error ;
@@ -450,6 +472,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
450472 }
451473
452474done :
475+ /* If the instance size is greater than the PMI size, add placeholder
476+ * names to the hostlist for the ranks that haven't joined yet.
477+ */
478+ for (int i = info .size ; i < size ; i ++ ) {
479+ char buf [64 ];
480+ snprintf (buf , sizeof (buf ), "extra%d" , i - info .size );
481+ if (hostlist_append (hl , buf ) < 0 ) {
482+ log_err ("hostlist_append" );
483+ goto error ;
484+ }
485+ }
453486 if (set_hostlist_attr (attrs , hl ) < 0 ) {
454487 log_err ("setattr hostlist" );
455488 goto error ;
0 commit comments