Skip to content

Commit bbd6fb3

Browse files
committed
broker: add flub RPC methods to overlay
Problem: the flub bootstrap method requires broker services. Add the following services (instance owner only): overlay.flub-getinfo (rank 0 only) Allocate an unused rank from rank 0 and also return size and misc. broker attributes to be set in the new broker overlay.flub-kex (peer rank) Exchange public keys with the TBON parent and obtain its zeromq URI. Add overlay_flub_provision() which is called by boot_pmi.c when extra ranks are configured, making those ranks available for allocation.
1 parent 6476828 commit bbd6fb3

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed

src/broker/boot_pmi.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,12 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
278278
if (topology_set_rank (topo, info.rank) < 0
279279
|| overlay_set_topology (overlay, topo) < 0)
280280
goto error;
281+
if (info.rank == 0 && size > info.size) {
282+
if (overlay_flub_provision (overlay, info.size, size - 1, true) < 0) {
283+
log_msg ("error provisioning flub allocator");
284+
goto error;
285+
}
286+
}
281287
if (gethostname (hostname, sizeof (hostname)) < 0) {
282288
log_err ("gethostname");
283289
goto error;

src/broker/overlay.c

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "src/common/libutil/errprintf.h"
3333
#include "src/common/librouter/rpc_track.h"
3434
#include "src/common/libccan/ccan/ptrint/ptrint.h"
35+
#include "ccan/array_size/array_size.h"
3536

3637
#include "overlay.h"
3738
#include "attr.h"
@@ -177,6 +178,7 @@ struct overlay {
177178
void *recv_arg;
178179

179180
struct flux_msglist *health_requests;
181+
struct idset *flub_rankpool;
180182
};
181183

182184
static void overlay_mcast_child (struct overlay *ov, const flux_msg_t *msg);
@@ -278,6 +280,10 @@ int overlay_set_topology (struct overlay *ov, struct topology *topo)
278280

279281
ov->size = topology_get_size (topo);
280282
ov->rank = topology_get_rank (topo);
283+
if (ov->rank == 0) {
284+
if (!(ov->flub_rankpool = idset_create (ov->size, 0)))
285+
goto error;
286+
}
281287
if (!zcert_meta (ov->cert, "name")) {
282288
char buf[16];
283289
snprintf (buf, sizeof (buf), "%lu", (unsigned long)ov->rank);
@@ -2025,11 +2031,142 @@ static int overlay_configure_topo (struct overlay *ov)
20252031
return 0;
20262032
}
20272033

2034+
static int overlay_flub_alloc (struct overlay *ov, int *rank)
2035+
{
2036+
unsigned int id;
2037+
2038+
if (!ov->flub_rankpool) { // created by overlay_set_topology()
2039+
errno = EINVAL;
2040+
return -1;
2041+
}
2042+
if ((id = idset_first (ov->flub_rankpool)) != IDSET_INVALID_ID) {
2043+
if (idset_clear (ov->flub_rankpool, id) < 0)
2044+
return -1;
2045+
*rank = (int)id;
2046+
return 0;
2047+
}
2048+
errno = ENOENT;
2049+
return -1;
2050+
}
2051+
2052+
int overlay_flub_provision (struct overlay *ov,
2053+
uint32_t lo_rank,
2054+
uint32_t hi_rank,
2055+
bool available)
2056+
{
2057+
if (!ov->flub_rankpool) { // created by overlay_set_topology()
2058+
errno = EINVAL;
2059+
return -1;
2060+
}
2061+
if (available)
2062+
return idset_range_set (ov->flub_rankpool, lo_rank, hi_rank);
2063+
return idset_range_clear (ov->flub_rankpool, lo_rank, hi_rank);
2064+
}
2065+
2066+
static json_t *flub_dict_create (attr_t *attrs)
2067+
{
2068+
const char *names[] = { "hostlist", "instance-level" };
2069+
json_t *dict;
2070+
2071+
if (!(dict = json_object ()))
2072+
goto nomem;
2073+
for (int i = 0; i < ARRAY_SIZE (names); i++) {
2074+
const char *val;
2075+
json_t *o;
2076+
if (attr_get (attrs, names[i], &val, NULL) < 0)
2077+
goto error;
2078+
if (!(o = json_string (val))
2079+
|| json_object_set_new (dict, names[i], o) < 0) {
2080+
json_decref (o);
2081+
goto nomem;
2082+
}
2083+
}
2084+
return dict;
2085+
nomem:
2086+
errno = ENOMEM;
2087+
error:
2088+
ERRNO_SAFE_WRAP (json_decref, dict);
2089+
return NULL;
2090+
}
2091+
2092+
static void overlay_flub_getinfo_cb (flux_t *h,
2093+
flux_msg_handler_t *mh,
2094+
const flux_msg_t *msg,
2095+
void *arg)
2096+
{
2097+
struct overlay *ov = arg;
2098+
const char *errmsg = NULL;
2099+
json_t *attrs = NULL;
2100+
int rank;
2101+
2102+
if (flux_request_unpack (msg, NULL, "{}") < 0)
2103+
goto error;
2104+
if (overlay_flub_alloc (ov, &rank) < 0) {
2105+
errmsg = "there are no available ranks";
2106+
goto error;
2107+
}
2108+
if (!(attrs = flub_dict_create (ov->attrs)))
2109+
goto error;
2110+
if (flux_respond_pack (h,
2111+
msg,
2112+
"{s:i s:i s:O}",
2113+
"rank", rank,
2114+
"size", ov->size,
2115+
"attrs", attrs) < 0)
2116+
flux_log_error (h, "error responding to overlay.flub-getinfo request");
2117+
json_decref (attrs);
2118+
return;
2119+
error:
2120+
if (flux_respond_error (h, msg, errno, errmsg) < 0)
2121+
flux_log_error (h, "error responding to overlay.flub-getinfo request");
2122+
json_decref (attrs);
2123+
}
2124+
2125+
static void overlay_flub_kex_cb (flux_t *h,
2126+
flux_msg_handler_t *mh,
2127+
const flux_msg_t *msg,
2128+
void *arg)
2129+
{
2130+
struct overlay *ov = arg;
2131+
const char *errmsg = NULL;
2132+
const char *name;
2133+
const char *pubkey;
2134+
2135+
if (flux_request_unpack (msg,
2136+
NULL,
2137+
"{s:s s:s}",
2138+
"name", &name,
2139+
"pubkey", &pubkey) < 0)
2140+
goto error;
2141+
if (ov->child_count == 0) {
2142+
errmsg = "this broker cannot have children";
2143+
errno = EINVAL;
2144+
goto error;
2145+
}
2146+
if (overlay_authorize (ov, name, pubkey) < 0) {
2147+
errmsg = "failed to authorize public key";
2148+
goto error;
2149+
}
2150+
if (flux_respond_pack (h,
2151+
msg,
2152+
"{s:s s:s s:s}",
2153+
"pubkey", overlay_cert_pubkey (ov),
2154+
"name", overlay_cert_name (ov),
2155+
"uri", overlay_get_bind_uri (ov)) < 0)
2156+
flux_log_error (h, "error responding to overlay.flub-kex request");
2157+
return;
2158+
error:
2159+
if (flux_respond_error (h, msg, errno, errmsg) < 0)
2160+
flux_log_error (h, "error responding to overlay.flub-kex request");
2161+
}
2162+
20282163
void overlay_destroy (struct overlay *ov)
20292164
{
20302165
if (ov) {
20312166
int saved_errno = errno;
20322167

2168+
idset_destroy (ov->flub_rankpool);
2169+
20332170
flux_msglist_destroy (ov->health_requests);
20342171

20352172
zcert_destroy (&ov->cert);
@@ -2073,6 +2210,18 @@ void overlay_destroy (struct overlay *ov)
20732210
}
20742211

20752212
static const struct flux_msg_handler_spec htab[] = {
2213+
{
2214+
FLUX_MSGTYPE_REQUEST,
2215+
"overlay.flub-kex",
2216+
overlay_flub_kex_cb,
2217+
0,
2218+
},
2219+
{
2220+
FLUX_MSGTYPE_REQUEST,
2221+
"overlay.flub-getinfo",
2222+
overlay_flub_getinfo_cb,
2223+
0,
2224+
},
20762225
{
20772226
FLUX_MSGTYPE_REQUEST,
20782227
"overlay.stats-get",

src/broker/overlay.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,13 @@ int overlay_set_monitor_cb (struct overlay *ov,
138138
overlay_monitor_f cb,
139139
void *arg);
140140

141+
/* Make a range of ranks available/unavailable for flub bootstrap
142+
*/
143+
int overlay_flub_provision (struct overlay *ov,
144+
uint32_t lo_rank,
145+
uint32_t hi_rank,
146+
bool available);
147+
141148
/* Register overlay-related broker attributes.
142149
*/
143150
int overlay_register_attrs (struct overlay *overlay);

0 commit comments

Comments
 (0)