Skip to content

Commit 99a107b

Browse files
committed
broker: add flub RPC methods to overlay
Problem: the flub bootstrap method requires broker services. Add the following services (instance owner only): overlay.flub-getinfo (rank 0 only) Allocate an unused rank from rank 0 and also return size and misc. broker attributes to be set in the new broker overlay.flub-kex (peer rank) Exchange public keys with the TBON parent and obtain its zeromq URI. Add overlay_flub_provision() which is called by boot_pmi.c when extra ranks are configured, making those ranks available for allocation.
1 parent 0d184e5 commit 99a107b

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed

src/broker/boot_pmi.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,12 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
277277
if (topology_set_rank (topo, info.rank) < 0
278278
|| overlay_set_topology (overlay, topo) < 0)
279279
goto error;
280+
if (info.rank == 0 && size > info.size) {
281+
if (overlay_flub_provision (overlay, info.size, size - 1, true) < 0) {
282+
log_msg ("error provisioning flub allocator");
283+
goto error;
284+
}
285+
}
280286
if (gethostname (hostname, sizeof (hostname)) < 0) {
281287
log_err ("gethostname");
282288
goto error;

src/broker/overlay.c

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "src/common/libutil/errprintf.h"
3939
#include "src/common/librouter/rpc_track.h"
4040
#include "ccan/str/str.h"
41+
#include "ccan/array_size/array_size.h"
4142

4243
#include "overlay.h"
4344
#include "attr.h"
@@ -185,6 +186,7 @@ struct overlay {
185186
void *recv_arg;
186187

187188
struct flux_msglist *health_requests;
189+
struct idset *flub_rankpool;
188190
};
189191

190192
static void overlay_mcast_child (struct overlay *ov, flux_msg_t *msg);
@@ -286,6 +288,10 @@ int overlay_set_topology (struct overlay *ov, struct topology *topo)
286288

287289
ov->size = topology_get_size (topo);
288290
ov->rank = topology_get_rank (topo);
291+
if (ov->rank == 0) {
292+
if (!(ov->flub_rankpool = idset_create (ov->size, 0)))
293+
goto error;
294+
}
289295
if (!cert_meta_get (ov->cert, "name")) {
290296
char val[16];
291297
snprintf (val, sizeof (val), "%lu", (unsigned long)ov->rank);
@@ -2073,11 +2079,142 @@ static int overlay_configure_topo (struct overlay *ov)
20732079
return 0;
20742080
}
20752081

2082+
static int overlay_flub_alloc (struct overlay *ov, int *rank)
2083+
{
2084+
unsigned int id;
2085+
2086+
if (!ov->flub_rankpool) { // created by overlay_set_topology()
2087+
errno = EINVAL;
2088+
return -1;
2089+
}
2090+
if ((id = idset_first (ov->flub_rankpool)) != IDSET_INVALID_ID) {
2091+
if (idset_clear (ov->flub_rankpool, id) < 0)
2092+
return -1;
2093+
*rank = (int)id;
2094+
return 0;
2095+
}
2096+
errno = ENOENT;
2097+
return -1;
2098+
}
2099+
2100+
int overlay_flub_provision (struct overlay *ov,
2101+
uint32_t lo_rank,
2102+
uint32_t hi_rank,
2103+
bool available)
2104+
{
2105+
if (!ov->flub_rankpool) { // created by overlay_set_topology()
2106+
errno = EINVAL;
2107+
return -1;
2108+
}
2109+
if (available)
2110+
return idset_range_set (ov->flub_rankpool, lo_rank, hi_rank);
2111+
return idset_range_clear (ov->flub_rankpool, lo_rank, hi_rank);
2112+
}
2113+
2114+
static json_t *flub_dict_create (attr_t *attrs)
2115+
{
2116+
const char *names[] = { "hostlist", "instance-level" };
2117+
json_t *dict;
2118+
2119+
if (!(dict = json_object ()))
2120+
goto nomem;
2121+
for (int i = 0; i < ARRAY_SIZE (names); i++) {
2122+
const char *val;
2123+
json_t *o;
2124+
if (attr_get (attrs, names[i], &val, NULL) < 0)
2125+
goto error;
2126+
if (!(o = json_string (val))
2127+
|| json_object_set_new (dict, names[i], o) < 0) {
2128+
json_decref (o);
2129+
goto nomem;
2130+
}
2131+
}
2132+
return dict;
2133+
nomem:
2134+
errno = ENOMEM;
2135+
error:
2136+
ERRNO_SAFE_WRAP (json_decref, dict);
2137+
return NULL;
2138+
}
2139+
2140+
static void overlay_flub_getinfo_cb (flux_t *h,
2141+
flux_msg_handler_t *mh,
2142+
const flux_msg_t *msg,
2143+
void *arg)
2144+
{
2145+
struct overlay *ov = arg;
2146+
const char *errmsg = NULL;
2147+
json_t *attrs = NULL;
2148+
int rank;
2149+
2150+
if (flux_request_unpack (msg, NULL, "{}") < 0)
2151+
goto error;
2152+
if (overlay_flub_alloc (ov, &rank) < 0) {
2153+
errmsg = "there are no available ranks";
2154+
goto error;
2155+
}
2156+
if (!(attrs = flub_dict_create (ov->attrs)))
2157+
goto error;
2158+
if (flux_respond_pack (h,
2159+
msg,
2160+
"{s:i s:i s:O}",
2161+
"rank", rank,
2162+
"size", ov->size,
2163+
"attrs", attrs) < 0)
2164+
flux_log_error (h, "error responding to overlay.flub-getinfo request");
2165+
json_decref (attrs);
2166+
return;
2167+
error:
2168+
if (flux_respond_error (h, msg, errno, errmsg) < 0)
2169+
flux_log_error (h, "error responding to overlay.flub-getinfo request");
2170+
json_decref (attrs);
2171+
}
2172+
2173+
static void overlay_flub_kex_cb (flux_t *h,
2174+
flux_msg_handler_t *mh,
2175+
const flux_msg_t *msg,
2176+
void *arg)
2177+
{
2178+
struct overlay *ov = arg;
2179+
const char *errmsg = NULL;
2180+
const char *name;
2181+
const char *pubkey;
2182+
2183+
if (flux_request_unpack (msg,
2184+
NULL,
2185+
"{s:s s:s}",
2186+
"name", &name,
2187+
"pubkey", &pubkey) < 0)
2188+
goto error;
2189+
if (ov->child_count == 0) {
2190+
errmsg = "this broker cannot have children";
2191+
errno = EINVAL;
2192+
goto error;
2193+
}
2194+
if (overlay_authorize (ov, name, pubkey) < 0) {
2195+
errmsg = "failed to authorize public key";
2196+
goto error;
2197+
}
2198+
if (flux_respond_pack (h,
2199+
msg,
2200+
"{s:s s:s s:s}",
2201+
"pubkey", overlay_cert_pubkey (ov),
2202+
"name", overlay_cert_name (ov),
2203+
"uri", overlay_get_bind_uri (ov)) < 0)
2204+
flux_log_error (h, "error responding to overlay.flub-kex request");
2205+
return;
2206+
error:
2207+
if (flux_respond_error (h, msg, errno, errmsg) < 0)
2208+
flux_log_error (h, "error responding to overlay.flub-kex request");
2209+
}
2210+
20762211
void overlay_destroy (struct overlay *ov)
20772212
{
20782213
if (ov) {
20792214
int saved_errno = errno;
20802215

2216+
idset_destroy (ov->flub_rankpool);
2217+
20812218
flux_msglist_destroy (ov->health_requests);
20822219

20832220
cert_destroy (ov->cert);
@@ -2123,6 +2260,18 @@ void overlay_destroy (struct overlay *ov)
21232260
}
21242261

21252262
static const struct flux_msg_handler_spec htab[] = {
2263+
{
2264+
FLUX_MSGTYPE_REQUEST,
2265+
"overlay.flub-kex",
2266+
overlay_flub_kex_cb,
2267+
0,
2268+
},
2269+
{
2270+
FLUX_MSGTYPE_REQUEST,
2271+
"overlay.flub-getinfo",
2272+
overlay_flub_getinfo_cb,
2273+
0,
2274+
},
21262275
{
21272276
FLUX_MSGTYPE_REQUEST,
21282277
"overlay.stats-get",

src/broker/overlay.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ int overlay_set_monitor_cb (struct overlay *ov,
145145
overlay_monitor_f cb,
146146
void *arg);
147147

148+
/* Make a range of ranks available/unavailable for flub bootstrap
149+
*/
150+
int overlay_flub_provision (struct overlay *ov,
151+
uint32_t lo_rank,
152+
uint32_t hi_rank,
153+
bool available);
154+
148155
/* Register overlay-related broker attributes.
149156
*/
150157
int overlay_register_attrs (struct overlay *overlay);

0 commit comments

Comments
 (0)