Skip to content

Commit c03df33

Browse files
committed
broker: add flub RPC methods to overlay
Problem: the flub bootstrap method requires broker services. Add the following services (instance owner only): overlay.flub-getinfo (rank 0 only) Allocate an unused rank from rank 0 and also return size and misc. broker attributes to be set in the new broker overlay.flub-kex (peer rank) Exchange public keys with the TBON parent and obtain its zeromq URI. Add overlay_flub_provision() which is called by boot_pmi.c when extra ranks are configured, making those ranks available for allocation.
1 parent 35ac794 commit c03df33

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed

src/broker/boot_pmi.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,12 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
278278
if (topology_set_rank (topo, info.rank) < 0
279279
|| overlay_set_topology (overlay, topo) < 0)
280280
goto error;
281+
if (info.rank == 0 && size > info.size) {
282+
if (overlay_flub_provision (overlay, info.size, size - 1, true) < 0) {
283+
log_msg ("error provisioning flub allocator");
284+
goto error;
285+
}
286+
}
281287
if (gethostname (hostname, sizeof (hostname)) < 0) {
282288
log_err ("gethostname");
283289
goto error;

src/broker/overlay.c

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "src/common/libutil/errprintf.h"
3939
#include "src/common/librouter/rpc_track.h"
4040
#include "ccan/str/str.h"
41+
#include "ccan/array_size/array_size.h"
4142

4243
#include "overlay.h"
4344
#include "attr.h"
@@ -192,6 +193,7 @@ struct overlay {
192193

193194
struct flux_msglist *health_requests;
194195
struct flux_msglist *trace_requests;
196+
struct idset *flub_rankpool;
195197
};
196198

197199
static void overlay_mcast_child (struct overlay *ov, flux_msg_t *msg);
@@ -297,6 +299,10 @@ int overlay_set_topology (struct overlay *ov, struct topology *topo)
297299

298300
ov->size = topology_get_size (topo);
299301
ov->rank = topology_get_rank (topo);
302+
if (ov->rank == 0) {
303+
if (!(ov->flub_rankpool = idset_create (ov->size, 0)))
304+
goto error;
305+
}
300306
if (!cert_meta_get (ov->cert, "name")) {
301307
char val[16];
302308
snprintf (val, sizeof (val), "%lu", (unsigned long)ov->rank);
@@ -2326,11 +2332,142 @@ static int overlay_configure_topo (struct overlay *ov)
23262332
return 0;
23272333
}
23282334

2335+
static int overlay_flub_alloc (struct overlay *ov, int *rank)
2336+
{
2337+
unsigned int id;
2338+
2339+
if (!ov->flub_rankpool) { // created by overlay_set_topology()
2340+
errno = EINVAL;
2341+
return -1;
2342+
}
2343+
if ((id = idset_first (ov->flub_rankpool)) != IDSET_INVALID_ID) {
2344+
if (idset_clear (ov->flub_rankpool, id) < 0)
2345+
return -1;
2346+
*rank = (int)id;
2347+
return 0;
2348+
}
2349+
errno = ENOENT;
2350+
return -1;
2351+
}
2352+
2353+
int overlay_flub_provision (struct overlay *ov,
2354+
uint32_t lo_rank,
2355+
uint32_t hi_rank,
2356+
bool available)
2357+
{
2358+
if (!ov->flub_rankpool) { // created by overlay_set_topology()
2359+
errno = EINVAL;
2360+
return -1;
2361+
}
2362+
if (available)
2363+
return idset_range_set (ov->flub_rankpool, lo_rank, hi_rank);
2364+
return idset_range_clear (ov->flub_rankpool, lo_rank, hi_rank);
2365+
}
2366+
2367+
static json_t *flub_dict_create (attr_t *attrs)
2368+
{
2369+
const char *names[] = { "hostlist", "instance-level" };
2370+
json_t *dict;
2371+
2372+
if (!(dict = json_object ()))
2373+
goto nomem;
2374+
for (int i = 0; i < ARRAY_SIZE (names); i++) {
2375+
const char *val;
2376+
json_t *o;
2377+
if (attr_get (attrs, names[i], &val, NULL) < 0)
2378+
goto error;
2379+
if (!(o = json_string (val))
2380+
|| json_object_set_new (dict, names[i], o) < 0) {
2381+
json_decref (o);
2382+
goto nomem;
2383+
}
2384+
}
2385+
return dict;
2386+
nomem:
2387+
errno = ENOMEM;
2388+
error:
2389+
ERRNO_SAFE_WRAP (json_decref, dict);
2390+
return NULL;
2391+
}
2392+
2393+
static void overlay_flub_getinfo_cb (flux_t *h,
2394+
flux_msg_handler_t *mh,
2395+
const flux_msg_t *msg,
2396+
void *arg)
2397+
{
2398+
struct overlay *ov = arg;
2399+
const char *errmsg = NULL;
2400+
json_t *attrs = NULL;
2401+
int rank;
2402+
2403+
if (flux_request_unpack (msg, NULL, "{}") < 0)
2404+
goto error;
2405+
if (overlay_flub_alloc (ov, &rank) < 0) {
2406+
errmsg = "there are no available ranks";
2407+
goto error;
2408+
}
2409+
if (!(attrs = flub_dict_create (ov->attrs)))
2410+
goto error;
2411+
if (flux_respond_pack (h,
2412+
msg,
2413+
"{s:i s:i s:O}",
2414+
"rank", rank,
2415+
"size", ov->size,
2416+
"attrs", attrs) < 0)
2417+
flux_log_error (h, "error responding to overlay.flub-getinfo request");
2418+
json_decref (attrs);
2419+
return;
2420+
error:
2421+
if (flux_respond_error (h, msg, errno, errmsg) < 0)
2422+
flux_log_error (h, "error responding to overlay.flub-getinfo request");
2423+
json_decref (attrs);
2424+
}
2425+
2426+
static void overlay_flub_kex_cb (flux_t *h,
2427+
flux_msg_handler_t *mh,
2428+
const flux_msg_t *msg,
2429+
void *arg)
2430+
{
2431+
struct overlay *ov = arg;
2432+
const char *errmsg = NULL;
2433+
const char *name;
2434+
const char *pubkey;
2435+
2436+
if (flux_request_unpack (msg,
2437+
NULL,
2438+
"{s:s s:s}",
2439+
"name", &name,
2440+
"pubkey", &pubkey) < 0)
2441+
goto error;
2442+
if (ov->child_count == 0) {
2443+
errmsg = "this broker cannot have children";
2444+
errno = EINVAL;
2445+
goto error;
2446+
}
2447+
if (overlay_authorize (ov, name, pubkey) < 0) {
2448+
errmsg = "failed to authorize public key";
2449+
goto error;
2450+
}
2451+
if (flux_respond_pack (h,
2452+
msg,
2453+
"{s:s s:s s:s}",
2454+
"pubkey", overlay_cert_pubkey (ov),
2455+
"name", overlay_cert_name (ov),
2456+
"uri", overlay_get_bind_uri (ov)) < 0)
2457+
flux_log_error (h, "error responding to overlay.flub-kex request");
2458+
return;
2459+
error:
2460+
if (flux_respond_error (h, msg, errno, errmsg) < 0)
2461+
flux_log_error (h, "error responding to overlay.flub-kex request");
2462+
}
2463+
23292464
void overlay_destroy (struct overlay *ov)
23302465
{
23312466
if (ov) {
23322467
int saved_errno = errno;
23332468

2469+
idset_destroy (ov->flub_rankpool);
2470+
23342471
flux_msglist_destroy (ov->health_requests);
23352472

23362473
cert_destroy (ov->cert);
@@ -2384,6 +2521,18 @@ static const struct flux_msg_handler_spec htab[] = {
23842521
overlay_trace_cb,
23852522
0,
23862523
},
2524+
{
2525+
FLUX_MSGTYPE_REQUEST,
2526+
"overlay.flub-kex",
2527+
overlay_flub_kex_cb,
2528+
0,
2529+
},
2530+
{
2531+
FLUX_MSGTYPE_REQUEST,
2532+
"overlay.flub-getinfo",
2533+
overlay_flub_getinfo_cb,
2534+
0,
2535+
},
23872536
{
23882537
FLUX_MSGTYPE_REQUEST,
23892538
"overlay.stats-get",

src/broker/overlay.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ int overlay_set_monitor_cb (struct overlay *ov,
145145
overlay_monitor_f cb,
146146
void *arg);
147147

148+
/* Make a range of ranks available/unavailable for flub bootstrap
149+
*/
150+
int overlay_flub_provision (struct overlay *ov,
151+
uint32_t lo_rank,
152+
uint32_t hi_rank,
153+
bool available);
154+
148155
/* Register overlay-related broker attributes.
149156
*/
150157
int overlay_register_attrs (struct overlay *overlay);

0 commit comments

Comments
 (0)