@@ -75,17 +75,22 @@ static char *pmi_mapping_to_taskmap (const char *s)
7575}
7676
7777/* Set broker.mapping attribute from enclosing instance taskmap.
78+ * Skip setting the taskmap if extra_ranks is nonzero since the
79+ * mapping of those ranks is unknown. N.B. when broker.mapping is missing,
80+ * use_ipc() below will return false, a good thing since it is unknown
81+ * whether ipc:// would work for the TBON wire-up of extra nodes.
7882 */
7983static int set_broker_mapping_attr (struct upmi * upmi ,
8084 int size ,
81- attr_t * attrs )
85+ attr_t * attrs ,
86+ int extra_ranks )
8287{
8388 char * val = NULL ;
8489 int rc ;
8590
8691 if (size == 1 )
8792 val = strdup ("[[0,1,1,1]]" );
88- else {
93+ else if ( extra_ranks == 0 ) {
8994 /* First attempt to get flux.taskmap, falling back to
9095 * PMI_process_mapping if this key is not available.
9196 */
@@ -163,7 +168,7 @@ static int format_bind_uri (char *buf, int bufsz, attr_t *attrs, int rank)
163168 return -1 ;
164169}
165170
166- static int set_hostlist_attr (attr_t * attrs , struct hostlist * hl )
171+ static int set_hostlist_attr (attr_t * attrs ,struct hostlist * hl )
167172{
168173 const char * value ;
169174 char * s ;
@@ -219,9 +224,10 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
219224 int child_count ;
220225 int * child_ranks = NULL ;
221226 const char * uri ;
222- int i ;
223227 int upmi_flags = UPMI_LIBPMI_NOFLUX ;
224228 const char * upmi_method ;
229+ const char * s ;
230+ int size ;
225231
226232 // N.B. overlay_create() sets the tbon.topo attribute
227233 if (attr_get (attrs , "tbon.topo" , & topo_uri , NULL ) < 0 ) {
@@ -248,11 +254,23 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
248254 log_err ("set_instance_level_attr" );
249255 goto error ;
250256 }
251- if (set_broker_mapping_attr (upmi , info .size , attrs ) < 0 ) {
257+ /* Allow the PMI size to be overridden with a larger one so that
258+ * additional ranks can be grafted on later.
259+ */
260+ size = info .size ;
261+ if (attr_get (attrs , "size" , & s , NULL ) == 0 ) {
262+ errno = 0 ;
263+ size = strtoul (s , NULL , 10 );
264+ if (errno != 0 || size <= info .size ) {
265+ log_msg ("instance size may only be increased" );
266+ goto error ;
267+ }
268+ }
269+ if (set_broker_mapping_attr (upmi , size , attrs , size - info .size ) < 0 ) {
252270 log_err ("error setting broker.mapping attribute" );
253271 goto error ;
254272 }
255- if (!(topo = topology_create (topo_uri , info . size , & error ))) {
273+ if (!(topo = topology_create (topo_uri , size , & error ))) {
256274 log_msg ("error creating '%s' topology: %s" , topo_uri , error .text );
257275 goto error ;
258276 }
@@ -268,16 +286,6 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
268286 goto error ;
269287 }
270288
271- /* A size=1 instance has no peers, so skip the PMI exchange.
272- */
273- if (info .size == 1 ) {
274- if (hostlist_append (hl , hostname ) < 0 ) {
275- log_err ("hostlist_append" );
276- goto error ;
277- }
278- goto done ;
279- }
280-
281289 /* Enable ipv6 for maximum flexibility in address selection.
282290 */
283291 overlay_set_ipv6 (overlay , 1 );
@@ -309,6 +317,16 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
309317 goto error ;
310318 }
311319
320+ /* If the PMI size is 1, then skip the PMI exchange entirely.
321+ */
322+ if (info .size == 1 ) {
323+ if (hostlist_append (hl , hostname ) < 0 ) {
324+ log_err ("hostlist_append" );
325+ goto error ;
326+ }
327+ goto done ;
328+ }
329+
312330 /* Each broker writes a "business card" consisting of hostname,
313331 * public key, and URI (empty string for leaf node).
314332 */
@@ -380,10 +398,13 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
380398
381399 /* Fetch the business card of children and inform overlay of public keys.
382400 */
383- for (i = 0 ; i < child_count ; i ++ ) {
401+ for (int i = 0 ; i < child_count ; i ++ ) {
384402 const char * peer_pubkey ;
385403 int child_rank = child_ranks [i ];
386404
405+ if (child_rank >= info .size )
406+ break ;
407+
387408 if (snprintf (key , sizeof (key ), "%d" , child_rank ) >= sizeof (key )) {
388409 log_msg ("pmi key string overflow" );
389410 goto error ;
@@ -412,7 +433,7 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
412433 /* Fetch the business card of all ranks and build hostlist.
413434 * The hostlist is built indepenedently (and in parallel) on all ranks.
414435 */
415- for (i = 0 ; i < info .size ; i ++ ) {
436+ for (int i = 0 ; i < info .size ; i ++ ) {
416437 const char * peer_hostname ;
417438
418439 if (snprintf (key , sizeof (key ), "%d" , i ) >= sizeof (key )) {
@@ -449,6 +470,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
449470 }
450471
451472done :
473+ /* If the instance size is greater than the PMI size, add placeholder
474+ * names to the hostlist for the ranks that haven't joined yet.
475+ */
476+ for (int i = info .size ; i < size ; i ++ ) {
477+ char buf [64 ];
478+ snprintf (buf , sizeof (buf ), "extra%d" , i - info .size );
479+ if (hostlist_append (hl , buf ) < 0 ) {
480+ log_err ("hostlist_append" );
481+ goto error ;
482+ }
483+ }
452484 if (set_hostlist_attr (attrs , hl ) < 0 ) {
453485 log_err ("setattr hostlist" );
454486 goto error ;
0 commit comments