@@ -75,17 +75,22 @@ static char *pmi_mapping_to_taskmap (const char *s)
7575}
7676
7777/* Set broker.mapping attribute from enclosing instance taskmap.
78+ * Skip setting the taskmap if extra_ranks is nonzero since the
79+ * mapping of those ranks is unknown. N.B. when broker.mapping is missing,
80+ * use_ipc() below will return false, a good thing since it is unknown
81+ * whether ipc:// would work for the TBON wire-up of extra nodes.
7882 */
7983static int set_broker_mapping_attr (struct upmi * upmi ,
8084 int size ,
81- attr_t * attrs )
85+ attr_t * attrs ,
86+ int extra_ranks )
8287{
8388 char * val = NULL ;
8489 int rc ;
8590
8691 if (size == 1 )
8792 val = strdup ("[[0,1,1,1]]" );
88- else {
93+ else if ( extra_ranks == 0 ) {
8994 /* First attempt to get flux.taskmap, falling back to
9095 * PMI_process_mapping if this key is not available.
9196 */
@@ -163,7 +168,7 @@ static int format_bind_uri (char *buf, int bufsz, attr_t *attrs, int rank)
163168 return -1 ;
164169}
165170
166- static int set_hostlist_attr (attr_t * attrs , struct hostlist * hl )
171+ static int set_hostlist_attr (attr_t * attrs ,struct hostlist * hl )
167172{
168173 const char * value ;
169174 char * s ;
@@ -220,9 +225,10 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
220225 int child_count ;
221226 int * child_ranks = NULL ;
222227 const char * uri ;
223- int i ;
224228 int upmi_flags = UPMI_LIBPMI_NOFLUX ;
225229 const char * upmi_method ;
230+ const char * s ;
231+ int size ;
226232
227233 // N.B. overlay_create() sets the tbon.topo attribute
228234 if (attr_get (attrs , "tbon.topo" , & topo_uri , NULL ) < 0 ) {
@@ -249,11 +255,23 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
249255 log_err ("set_instance_level_attr" );
250256 goto error ;
251257 }
252- if (set_broker_mapping_attr (upmi , info .size , attrs ) < 0 ) {
258+ /* Allow the PMI size to be overridden with a larger one so that
259+ * additional ranks can be grafted on later.
260+ */
261+ size = info .size ;
262+ if (attr_get (attrs , "size" , & s , NULL ) == 0 ) {
263+ errno = 0 ;
264+ size = strtoul (s , NULL , 10 );
265+ if (errno != 0 || size <= info .size ) {
266+ log_msg ("instance size may only be increased" );
267+ goto error ;
268+ }
269+ }
270+ if (set_broker_mapping_attr (upmi , size , attrs , size - info .size ) < 0 ) {
253271 log_err ("error setting broker.mapping attribute" );
254272 goto error ;
255273 }
256- if (!(topo = topology_create (topo_uri , info . size , & error ))) {
274+ if (!(topo = topology_create (topo_uri , size , & error ))) {
257275 log_msg ("error creating '%s' topology: %s" , topo_uri , error .text );
258276 goto error ;
259277 }
@@ -269,16 +287,6 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
269287 goto error ;
270288 }
271289
272- /* A size=1 instance has no peers, so skip the PMI exchange.
273- */
274- if (info .size == 1 ) {
275- if (hostlist_append (hl , hostname ) < 0 ) {
276- log_err ("hostlist_append" );
277- goto error ;
278- }
279- goto done ;
280- }
281-
282290 /* Enable ipv6 for maximum flexibility in address selection.
283291 */
284292 overlay_set_ipv6 (overlay , 1 );
@@ -310,6 +318,16 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
310318 goto error ;
311319 }
312320
321+ /* If the PMI size is 1, then skip the PMI exchange entirely.
322+ */
323+ if (info .size == 1 ) {
324+ if (hostlist_append (hl , hostname ) < 0 ) {
325+ log_err ("hostlist_append" );
326+ goto error ;
327+ }
328+ goto done ;
329+ }
330+
313331 /* Each broker writes a "business card" consisting of hostname,
314332 * public key, and URI (empty string for leaf node).
315333 */
@@ -390,10 +408,13 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
390408
391409 /* Fetch the business card of children and inform overlay of public keys.
392410 */
393- for (i = 0 ; i < child_count ; i ++ ) {
411+ for (int i = 0 ; i < child_count ; i ++ ) {
394412 const char * peer_pubkey ;
395413 int child_rank = child_ranks [i ];
396414
415+ if (child_rank >= info .size )
416+ break ;
417+
397418 if (snprintf (key , sizeof (key ), "%d" , child_rank ) >= sizeof (key )) {
398419 log_msg ("pmi key string overflow" );
399420 goto error ;
@@ -428,7 +449,7 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
428449 /* Fetch the business card of all ranks and build hostlist.
429450 * The hostlist is built independently (and in parallel) on all ranks.
430451 */
431- for (i = 0 ; i < info .size ; i ++ ) {
452+ for (int i = 0 ; i < info .size ; i ++ ) {
432453 const char * peer_hostname ;
433454
434455 if (snprintf (key , sizeof (key ), "%d" , i ) >= sizeof (key )) {
@@ -471,6 +492,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
471492 }
472493
473494done :
495+ /* If the instance size is greater than the PMI size, add placeholder
496+ * names to the hostlist for the ranks that haven't joined yet.
497+ */
498+ for (int i = info .size ; i < size ; i ++ ) {
499+ char buf [64 ];
500+ snprintf (buf , sizeof (buf ), "extra%d" , i - info .size );
501+ if (hostlist_append (hl , buf ) < 0 ) {
502+ log_err ("hostlist_append" );
503+ goto error ;
504+ }
505+ }
474506 if (set_hostlist_attr (attrs , hl ) < 0 ) {
475507 log_err ("setattr hostlist" );
476508 goto error ;
0 commit comments