@@ -76,17 +76,22 @@ static char *pmi_mapping_to_taskmap (const char *s)
7676}
7777
7878/* Set broker.mapping attribute from enclosing instance taskmap.
79+ * Skip setting the taskmap if extra_ranks is nonzero since the
80+ * mapping of those ranks is unknown. N.B. when broker.mapping is missing,
81+ * use_ipc() below will return false, a good thing since it is unknown
82+ * whether ipc:// would work for the TBON wire-up of extra nodes.
7983 */
8084static int set_broker_mapping_attr (struct upmi * upmi ,
8185 int size ,
82- attr_t * attrs )
86+ attr_t * attrs ,
87+ int extra_ranks )
8388{
8489 char * val = NULL ;
8590 int rc ;
8691
8792 if (size == 1 )
8893 val = strdup ("[[0,1,1,1]]" );
89- else {
94+ else if ( extra_ranks == 0 ) {
9095 /* First attempt to get flux.taskmap, falling back to
9196 * PMI_process_mapping if this key is not available.
9297 */
@@ -164,7 +169,7 @@ static int format_bind_uri (char *buf, int bufsz, attr_t *attrs, int rank)
164169 return -1 ;
165170}
166171
167- static int set_hostlist_attr (attr_t * attrs , struct hostlist * hl )
172+ static int set_hostlist_attr (attr_t * attrs ,struct hostlist * hl )
168173{
169174 const char * value ;
170175 char * s ;
@@ -220,9 +225,10 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
220225 int child_count ;
221226 int * child_ranks = NULL ;
222227 const char * uri ;
223- int i ;
224228 int upmi_flags = UPMI_LIBPMI_NOFLUX ;
225229 const char * upmi_method ;
230+ const char * s ;
231+ int size ;
226232
227233 // N.B. overlay_create() sets the tbon.topo attribute
228234 if (attr_get (attrs , "tbon.topo" , & topo_uri , NULL ) < 0 ) {
@@ -249,11 +255,23 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
249255 log_err ("set_instance_level_attr" );
250256 goto error ;
251257 }
252- if (set_broker_mapping_attr (upmi , info .size , attrs ) < 0 ) {
258+ /* Allow the PMI size to be overridden with a larger one so that
259+ * additional ranks can be grafted on later.
260+ */
261+ size = info .size ;
262+ if (attr_get (attrs , "size" , & s , NULL ) == 0 ) {
263+ errno = 0 ;
264+ size = strtoul (s , NULL , 10 );
265+ if (errno != 0 || size <= info .size ) {
266+ log_msg ("instance size may only be increased" );
267+ goto error ;
268+ }
269+ }
270+ if (set_broker_mapping_attr (upmi , size , attrs , size - info .size ) < 0 ) {
253271 log_err ("error setting broker.mapping attribute" );
254272 goto error ;
255273 }
256- if (!(topo = topology_create (topo_uri , info . size , & error ))) {
274+ if (!(topo = topology_create (topo_uri , size , & error ))) {
257275 log_msg ("error creating '%s' topology: %s" , topo_uri , error .text );
258276 goto error ;
259277 }
@@ -269,16 +287,6 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
269287 goto error ;
270288 }
271289
272- /* A size=1 instance has no peers, so skip the PMI exchange.
273- */
274- if (info .size == 1 ) {
275- if (hostlist_append (hl , hostname ) < 0 ) {
276- log_err ("hostlist_append" );
277- goto error ;
278- }
279- goto done ;
280- }
281-
282290 /* Enable ipv6 for maximum flexibility in address selection.
283291 */
284292 overlay_set_ipv6 (overlay , 1 );
@@ -310,6 +318,16 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
310318 goto error ;
311319 }
312320
321+ /* If the PMI size is 1, then skip the PMI exchange entirely.
322+ */
323+ if (info .size == 1 ) {
324+ if (hostlist_append (hl , hostname ) < 0 ) {
325+ log_err ("hostlist_append" );
326+ goto error ;
327+ }
328+ goto done ;
329+ }
330+
313331 /* Each broker writes a "business card" consisting of hostname,
314332 * public key, and URI (empty string for leaf node).
315333 */
@@ -381,10 +399,13 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
381399
382400 /* Fetch the business card of children and inform overlay of public keys.
383401 */
384- for (i = 0 ; i < child_count ; i ++ ) {
402+ for (int i = 0 ; i < child_count ; i ++ ) {
385403 const char * peer_pubkey ;
386404 int child_rank = child_ranks [i ];
387405
406+ if (child_rank >= info .size )
407+ break ;
408+
388409 if (snprintf (key , sizeof (key ), "%d" , child_rank ) >= sizeof (key )) {
389410 log_msg ("pmi key string overflow" );
390411 goto error ;
@@ -413,7 +434,7 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
413434 /* Fetch the business card of all ranks and build hostlist.
414435 * The hostlist is built independently (and in parallel) on all ranks.
415436 */
416- for (i = 0 ; i < info .size ; i ++ ) {
437+ for (int i = 0 ; i < info .size ; i ++ ) {
417438 const char * peer_hostname ;
418439
419440 if (snprintf (key , sizeof (key ), "%d" , i ) >= sizeof (key )) {
@@ -450,6 +471,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
450471 }
451472
452473done :
474+ /* If the instance size is greater than the PMI size, add placeholder
475+ * names to the hostlist for the ranks that haven't joined yet.
476+ */
477+ for (int i = info .size ; i < size ; i ++ ) {
478+ char buf [64 ];
479+ snprintf (buf , sizeof (buf ), "extra%d" , i - info .size );
480+ if (hostlist_append (hl , buf ) < 0 ) {
481+ log_err ("hostlist_append" );
482+ goto error ;
483+ }
484+ }
453485 if (set_hostlist_attr (attrs , hl ) < 0 ) {
454486 log_err ("setattr hostlist" );
455487 goto error ;
0 commit comments