Skip to content

Commit 35ac794

Browse files
committed
broker: add flub bootstrap method
Problem: there is no way to add brokers to an instance that has extra slots available. Add support for FLUB, the FLUx Bootstrap protocol, used when the broker is started with broker.boot-server=<uri> The bootstrap protocol consists of two RPCs: 1) overlay.flub-getinfo, which requests the allocation of an available rank from rank 0 of the instance that is being extended, and also retrieves the instance size and some broker attributes. 2) overlay.flub-kex, which exchanges public keys with the new rank's TBON parent and obtains the parent's TBON URI. Assumptions: - all ranks have the same topology configuration Limitations (for now): - hostnames will be logged as extra[0-N] - a broker rank cannot be re-allocated to a new broker - a broker cannot replace one that failed in a regular instance - dummy resources for the max size of the instance must be configured
1 parent b6328d9 commit 35ac794

File tree

4 files changed

+256
-0
lines changed

4 files changed

+256
-0
lines changed

src/broker/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ libbroker_la_SOURCES = \
5353
boot_config.c \
5454
boot_pmi.h \
5555
boot_pmi.c \
56+
boot_flub.h \
57+
boot_flub.c \
5658
publisher.h \
5759
publisher.c \
5860
groups.h \

src/broker/boot_flub.c

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
/************************************************************\
2+
* Copyright 2023 Lawrence Livermore National Security, LLC
3+
* (c.f. AUTHORS, NOTICE.LLNS, COPYING)
4+
*
5+
* This file is part of the Flux resource manager framework.
6+
* For details, see https://github.com/flux-framework.
7+
*
8+
* SPDX-License-Identifier: LGPL-3.0
9+
\************************************************************/
10+
11+
/* boot_flub.c - FLUx Boot protocol
12+
*
13+
* Add a broker to an existing Flux instance.
14+
*/
15+
16+
#if HAVE_CONFIG_H
17+
#include "config.h"
18+
#endif
19+
#include <errno.h>
20+
#include <jansson.h>
21+
#include <flux/core.h>
22+
23+
#include "src/common/libidset/idset.h"
24+
#include "src/common/libutil/errprintf.h"
25+
#include "src/common/libutil/errno_safe.h"
26+
#include "src/common/libutil/ipaddr.h"
27+
#include "ccan/str/str.h"
28+
29+
#include "attr.h"
30+
#include "overlay.h"
31+
#include "topology.h"
32+
33+
#include "boot_flub.h"
34+
35+
struct boot_info {
36+
int size;
37+
int rank;
38+
json_t *attrs;
39+
};
40+
41+
struct boot_parent {
42+
char *pubkey;
43+
int rank;
44+
const char *uri;
45+
};
46+
47+
static int wait_for_group_membership (flux_t *h,
48+
const char *name,
49+
int rank,
50+
flux_error_t *error)
51+
{
52+
flux_future_t *f;
53+
bool online = false;
54+
55+
if (!(f = flux_rpc_pack (h,
56+
"groups.get",
57+
0,
58+
FLUX_RPC_STREAMING,
59+
"{s:s}",
60+
"name", "broker.online"))) {
61+
errprintf (error, "broker.online: %s", strerror (errno));
62+
return -1;
63+
}
64+
do {
65+
const char *members;
66+
struct idset *ids;
67+
68+
if (flux_rpc_get_unpack (f, "{s:s}", "members", &members) < 0) {
69+
errprintf (error, "broker.online: %s", future_strerror (f, errno));
70+
break;
71+
}
72+
if ((ids = idset_decode (members))
73+
&& idset_test (ids, rank))
74+
online = true;
75+
idset_destroy (ids);
76+
flux_future_reset (f);
77+
} while (!online);
78+
flux_future_destroy (f);
79+
80+
return online ? 0 : -1;
81+
}
82+
83+
static int set_attrs (attr_t *attrs, json_t *dict)
84+
{
85+
const char *key;
86+
json_t *val;
87+
88+
json_object_foreach (dict, key, val) {
89+
const char *s = json_string_value (val);
90+
if (!s) {
91+
errno = EPROTO;
92+
return -1;
93+
}
94+
if (attr_add (attrs, key, s, ATTR_IMMUTABLE) < 0)
95+
return -1;
96+
}
97+
return 0;
98+
}
99+
100+
int boot_flub (struct broker *ctx, flux_error_t *error)
101+
{
102+
const char *uri = NULL;
103+
flux_t *h;
104+
flux_future_t *f = NULL;
105+
flux_future_t *f2 = NULL;
106+
struct topology *topo = NULL;
107+
const char *topo_uri;
108+
struct boot_info info;
109+
struct boot_parent parent;
110+
const char *bind_uri = NULL;
111+
int rc = -1;
112+
113+
/* Ask a Flux instance to allocate an available rank.
114+
* N.B. the broker unsets FLUX_URI so either it is set on the
115+
* command line via broker.boot-server, or the compiled-in path
116+
* is used (system instance).
117+
*/
118+
(void)attr_get (ctx->attrs, "broker.boot-server", &uri, NULL);
119+
if (!(h = flux_open_ex (uri, 0, error)))
120+
return -1;
121+
if (!(f = flux_rpc_pack (h,
122+
"overlay.flub-getinfo",
123+
0,
124+
0,
125+
"{}"))
126+
|| flux_rpc_get_unpack (f,
127+
"{s:i s:i s:o}",
128+
"rank", &info.rank,
129+
"size", &info.size,
130+
"attrs", &info.attrs) < 0) {
131+
errprintf (error, "%s", future_strerror (f, errno));
132+
goto out;
133+
}
134+
/* Set instance attributes obtained from boot server.
135+
*/
136+
if (set_attrs (ctx->attrs, info.attrs) < 0) {
137+
errprintf (error, "error setting attributes: %s", strerror (errno));
138+
goto out;
139+
}
140+
/* Create topology. All ranks are assumed to have the same topology.
141+
* The tbon.topo attribute is set in overlay_create() if not provided
142+
* on the command line.
143+
*/
144+
if (attr_get (ctx->attrs, "tbon.topo", &topo_uri, NULL) < 0) {
145+
errprintf (error, "error fetching tbon.topo attribute");
146+
goto out;
147+
}
148+
if (!(topo = topology_create (topo_uri, info.size, NULL))
149+
|| topology_set_rank (topo, info.rank) < 0
150+
|| overlay_set_topology (ctx->overlay, topo) < 0) {
151+
errprintf (error, "error creating topology: %s", strerror (errno));
152+
goto out;
153+
}
154+
if ((parent.rank = topology_get_parent (topo)) < 0) {
155+
errprintf (error,
156+
"rank %d has no parent in %s topology",
157+
info.rank,
158+
topo_uri);
159+
goto out;
160+
}
161+
/* Exchange public keys with TBON parent and obtain its URI.
162+
*/
163+
if (wait_for_group_membership (h, "broker.online", parent.rank, error) < 0)
164+
goto out;
165+
if (!(f2 = flux_rpc_pack (h,
166+
"overlay.flub-kex",
167+
parent.rank,
168+
0,
169+
"{s:s s:s}",
170+
"name", overlay_cert_name (ctx->overlay),
171+
"pubkey", overlay_cert_pubkey (ctx->overlay)))
172+
|| flux_rpc_get_unpack (f2,
173+
"{s:s s:s}",
174+
"pubkey", &parent.pubkey,
175+
"uri", &parent.uri) < 0) {
176+
errprintf (error, "%s", future_strerror (f, errno));
177+
goto out;
178+
}
179+
/* Inform overlay subsystem of parent info.
180+
*/
181+
if (overlay_set_parent_uri (ctx->overlay, parent.uri) < 0
182+
|| overlay_set_parent_pubkey (ctx->overlay, parent.pubkey) < 0) {
183+
errprintf (error,
184+
"error setting up overlay parameters: %s",
185+
strerror (errno));
186+
goto out;
187+
}
188+
/* If there are children, bind to zmq socket and update tbon.endpoint.
189+
* Since we don't know if OUR children are co-located on the same node,
190+
* always use the tcp transport.
191+
*/
192+
if (topology_get_child_ranks (topo, NULL, 0) > 0) {
193+
char ipaddr[HOST_NAME_MAX + 1];
194+
char *wild = NULL;
195+
196+
overlay_set_ipv6 (ctx->overlay, 1);
197+
if (ipaddr_getprimary (ipaddr,
198+
sizeof (ipaddr),
199+
error->text,
200+
sizeof (error->text)) < 0)
201+
goto out;
202+
if (asprintf (&wild, "tcp://%s:*", ipaddr) < 0
203+
|| overlay_bind (ctx->overlay, wild) < 0) {
204+
errprintf (error, "error binding to tcp://%s:*", ipaddr);
205+
ERRNO_SAFE_WRAP (free, wild);
206+
goto out;
207+
}
208+
bind_uri = overlay_get_bind_uri (ctx->overlay);
209+
}
210+
if (attr_add (ctx->attrs, "tbon.endpoint", bind_uri, ATTR_IMMUTABLE) < 0) {
211+
errprintf (error, "setattr tbon.endpoint");
212+
goto out;
213+
}
214+
rc = 0;
215+
out:
216+
topology_decref (topo);
217+
flux_future_destroy (f);
218+
flux_future_destroy (f2);
219+
flux_close (h);
220+
return rc;
221+
}
222+
223+
// vi:ts=4 sw=4 expandtab

src/broker/boot_flub.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/************************************************************\
2+
* Copyright 2023 Lawrence Livermore National Security, LLC
3+
* (c.f. AUTHORS, NOTICE.LLNS, COPYING)
4+
*
5+
* This file is part of the Flux resource manager framework.
6+
* For details, see https://github.com/flux-framework.
7+
*
8+
* SPDX-License-Identifier: LGPL-3.0
9+
\************************************************************/
10+
11+
#ifndef BROKER_BOOT_FLUB_H
12+
#define BROKER_BOOT_FLUB_H
13+
14+
#include <flux/core.h>
15+
16+
#include "broker.h"
17+
18+
int boot_flub (struct broker *ctx, flux_error_t *error);
19+
20+
#endif /* BROKER_BOOT_FLUB_H */
21+
22+
// vi:ts=4 sw=4 expandtab

src/broker/broker.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
#include "runat.h"
7676
#include "heaptrace.h"
7777
#include "exec.h"
78+
#include "boot_flub.h"
7879
#include "boot_config.h"
7980
#include "boot_pmi.h"
8081
#include "publisher.h"
@@ -359,6 +360,8 @@ int main (int argc, char *argv[])
359360
if (attr_get (ctx.attrs, "broker.boot-method", &boot_method, NULL) < 0) {
360361
if (flux_conf_unpack (conf, NULL, "{s:{}}", "bootstrap") == 0)
361362
boot_method = "config";
363+
else if (attr_get (ctx.attrs, "broker.boot-server", NULL, NULL) == 0)
364+
boot_method = "flub";
362365
else
363366
boot_method = NULL;
364367
}
@@ -368,6 +371,12 @@ int main (int argc, char *argv[])
368371
goto cleanup;
369372
}
370373
}
374+
else if (boot_method && streq (boot_method, "flub")) {
375+
if (boot_flub (&ctx, &error) < 0) {
376+
log_msg ("boot-flub: %s", error.text);
377+
goto cleanup;
378+
}
379+
}
371380
else {
372381
if (boot_pmi (ctx.overlay, ctx.attrs) < 0) {
373382
log_msg ("boot-pmi failed");

0 commit comments

Comments
 (0)