Skip to content

Commit 9f628ee

Browse files
authored
Merge pull request #4566 from grondo/resource.config
resource: support configuration of resources in TOML config
2 parents 24d65ac + d72dd8f commit 9f628ee

File tree

11 files changed

+704
-149
lines changed

11 files changed

+704
-149
lines changed

doc/man5/flux-config-resource.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,36 @@ path
2424
passed from the enclosing Flux instance. The ``flux R`` utility may be
2525
used to generate this file.
2626

27+
config
28+
(optional) An array of resource config entries used as an alternative to
29+
a R object configured by ``resource.path``. Each array entry must contain
30+
a ``hosts`` key in RFC 29 Hostlist Format which configures the list of
31+
hosts to which the rest of the entry applies. The entry may also contain
32+
``cores`` and/or ``gpus`` keys which configure the set of core ids and
33+
GPU ids (in RFC 22 idset form) available on the targeted hosts, or a
34+
``properties`` key which is an array of property strings to assign to
35+
``hosts``. It is not an error to list a host multiple times, instead
36+
each entry updates ``hosts``. If the ``config`` array exists, then any
37+
``path`` is ignored.
38+
39+
Example::
40+
41+
[[resource.config]]
42+
hosts = "test[1-100]"
43+
cores = "0-7"
44+
45+
[[resource.config]]
46+
hosts = "test[1,2]"
47+
gpus = "0-1"
48+
49+
[[resource.config]]
50+
hosts = "test[1-89]"
51+
properties = ["batch"]
52+
53+
[[resource.config]]
54+
hosts = "test[90-100]"
55+
properties = ["debug"]
56+
2757
exclude
2858
(optional) A string value that defines one or more nodes to withhold
2959
from scheduling, either in RFC 22 idset form, or in RFC 29 hostlist form.

src/cmd/flux-R.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "src/common/libutil/read_all.h"
3030
#include "src/common/librlist/rlist.h"
3131
#include "src/common/librlist/rhwloc.h"
32+
#include "src/common/libtomlc99/toml.h"
3233

3334
#define RSET_DOC "\
3435
Read, generate, and process RFC 20 Resource Set objects.\n\
@@ -43,6 +44,7 @@ int cmd_rerank (optparse_t *p, int argc, char **argv);
4344
int cmd_decode (optparse_t *p, int argc, char **argv);
4445
int cmd_verify (optparse_t *p, int argc, char **argv);
4546
int cmd_set_property (optparse_t *p, int argc, char **argv);
47+
int cmd_parse_config (optparse_t *p, int argc, char **argv);
4648

4749
static struct optparse_option global_opts[] = {
4850
OPTPARSE_TABLE_END
@@ -145,6 +147,11 @@ static struct optparse_option decode_opts[] = {
145147
OPTPARSE_TABLE_END
146148
};
147149

150+
static struct optparse_option parse_config_opts[] = {
151+
OPTPARSE_TABLE_END
152+
};
153+
154+
148155
static struct optparse_subcommand subcommands[] = {
149156
{ "encode",
150157
"[OPTIONS]...",
@@ -218,6 +225,13 @@ static struct optparse_subcommand subcommands[] = {
218225
0,
219226
set_property_opts,
220227
},
228+
{ "parse-config",
229+
"PATH",
230+
"Read config from resource.config array",
231+
cmd_parse_config,
232+
0,
233+
parse_config_opts,
234+
},
221235
OPTPARSE_SUBCMD_END
222236
};
223237

@@ -828,6 +842,33 @@ int cmd_set_property (optparse_t *p, int argc, char **argv)
828842
return 0;
829843
}
830844

845+
int cmd_parse_config (optparse_t *p, int argc, char **argv)
846+
{
847+
flux_error_t error;
848+
json_t *o = NULL;
849+
struct rlist *rl = NULL;
850+
flux_conf_t *conf;
851+
852+
if (!(conf = flux_conf_parse (argv[1], &error)))
853+
log_msg_exit ("flux_conf_parse: %s", error.text);
854+
855+
if (flux_conf_unpack (conf, &error,
856+
"{s:{s:o}}",
857+
"resource",
858+
"config", &o) < 0)
859+
log_msg_exit ("Config file error: %s", error.text);
860+
861+
if (!(rl = rlist_from_config (o, &error)))
862+
log_msg_exit ("Config file error: %s", error.text);
863+
864+
rlist_puts (rl);
865+
866+
flux_conf_decref (conf);
867+
rlist_destroy (rl);
868+
869+
return 0;
870+
}
871+
831872
/*
832873
* vi:tabstop=4 shiftwidth=4 expandtab
833874
*/

src/common/librlist/rlist.c

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,5 +2297,211 @@ struct rlist *rlist_from_hwloc (int rank, const char *xml)
22972297
return NULL;
22982298
}
22992299

2300+
/* Check if a resource set provided by configuration is valid.
2301+
* Returns -1 on failure with error in errp->text.
2302+
*/
2303+
static int rlist_config_check (struct rlist *rl, flux_error_t *errp)
2304+
{
2305+
struct rnode *n;
2306+
struct hostlist *empty;
2307+
int rc = -1;
2308+
2309+
if (zlistx_size (rl->nodes) == 0)
2310+
return errprintf (errp, "no hosts configured");
2311+
2312+
if (!(empty = hostlist_create ()))
2313+
return errprintf (errp, "hostlist_create: Out of memory");
2314+
2315+
n = zlistx_first (rl->nodes);
2316+
while (n) {
2317+
if (rnode_avail_total (n) <= 0) {
2318+
if (hostlist_append (empty, n->hostname) < 0) {
2319+
errprintf (errp,
2320+
"host %s was assigned no resources",
2321+
n->hostname);
2322+
goto out;
2323+
}
2324+
}
2325+
n = zlistx_next (rl->nodes);
2326+
}
2327+
if (hostlist_count (empty) > 0) {
2328+
char *s = hostlist_encode (empty);
2329+
errprintf (errp, "resource.config: %s assigned no resources", s);
2330+
free (s);
2331+
goto out;
2332+
}
2333+
rc = 0;
2334+
out:
2335+
hostlist_destroy (empty);
2336+
return rc;
2337+
}
2338+
2339+
/* Process one entry from the resource.config array
2340+
*/
2341+
static int rlist_config_add_entry (struct rlist *rl,
2342+
struct hostlist *hostmap,
2343+
flux_error_t *errp,
2344+
int index,
2345+
const char *hosts,
2346+
const char *cores,
2347+
const char *gpus,
2348+
json_t *properties)
2349+
{
2350+
struct hostlist *hl = NULL;
2351+
const char *host = NULL;
2352+
struct idset *coreids = NULL;
2353+
struct idset *gpuids = NULL;
2354+
struct idset *ranks = NULL;
2355+
int rc = -1;
2356+
2357+
if (!(hl = hostlist_decode (hosts))) {
2358+
errprintf (errp, "config[%d]: invalid hostlist '%s'", index, hosts);
2359+
goto error;
2360+
}
2361+
if (hostlist_count (hl) == 0) {
2362+
errprintf (errp, "config[%d]: empty hostlist specified", index);
2363+
goto error;
2364+
}
2365+
if (!(ranks = idset_create (0, IDSET_FLAG_AUTOGROW))) {
2366+
errprintf (errp, "idset_create: %s", strerror (errno));
2367+
goto error;
2368+
}
2369+
if (cores && !(coreids = idset_decode (cores))) {
2370+
errprintf (errp, "config[%d]: invalid idset cores='%s'", index, cores);
2371+
goto error;
2372+
}
2373+
if (gpus && !(gpuids = idset_decode (gpus))) {
2374+
errprintf (errp, "config[%d]: invalid idset gpus='%s'", index, gpus);
2375+
goto error;
2376+
}
2377+
host = hostlist_first (hl);
2378+
while (host) {
2379+
struct rnode *n;
2380+
int rank = hostlist_find (hostmap, host);
2381+
if (rank < 0) {
2382+
/*
2383+
* First time encountering this host. Append to host map
2384+
* hostlist and assign a rank.
2385+
*/
2386+
if (hostlist_append (hostmap, host) < 0) {
2387+
errprintf (errp, "failed to append %s to host map", host);
2388+
goto error;
2389+
}
2390+
rank = hostlist_count (hostmap) - 1;
2391+
}
2392+
if (idset_set (ranks, rank) < 0) {
2393+
errprintf (errp, "idset_set(ranks, %d): %s",
2394+
rank,
2395+
strerror (errno));
2396+
goto error;
2397+
}
2398+
if (!(n = rnode_new (host, rank))) {
2399+
errprintf (errp, "rnode_new: %s", strerror (errno));
2400+
goto error;
2401+
}
2402+
if (coreids && !rnode_add_child_idset (n, "core", coreids, coreids)) {
2403+
errprintf (errp, "rnode_add_child_idset: %s", strerror (errno));
2404+
goto error;
2405+
}
2406+
if (gpuids && !rnode_add_child_idset (n, "gpu", gpuids, gpuids)) {
2407+
errprintf (errp, "rnode_add_child_idset: %s", strerror (errno));
2408+
goto error;
2409+
}
2410+
if (properties) {
2411+
size_t idx;
2412+
json_t *o;
2413+
2414+
json_array_foreach (properties, idx, o) {
2415+
const char *property;
2416+
if (!(property = json_string_value (o))
2417+
|| property_string_invalid (property)) {
2418+
char *s = json_dumps (o, JSON_ENCODE_ANY);
2419+
errprintf (errp,
2420+
"config[%d]: invalid property \"%s\"",
2421+
index,
2422+
s);
2423+
free (s);
2424+
goto error;
2425+
}
2426+
if (rnode_set_property (n, property) < 0) {
2427+
errprintf (errp,
2428+
"Failed to set property %s on rank %u",
2429+
property,
2430+
rank);
2431+
goto error;
2432+
}
2433+
}
2434+
}
2435+
if (rlist_add_rnode (rl, n) < 0) {
2436+
errprintf (errp, "Unable to add rnode: %s", strerror (errno));
2437+
goto error;
2438+
}
2439+
host = hostlist_next (hl);
2440+
}
2441+
rc = 0;
2442+
error:
2443+
hostlist_destroy (hl);
2444+
idset_destroy (ranks);
2445+
idset_destroy (coreids);
2446+
idset_destroy (gpuids);
2447+
return rc;
2448+
}
2449+
2450+
struct rlist *rlist_from_config (json_t *conf, flux_error_t *errp)
2451+
{
2452+
size_t index;
2453+
json_t *entry;
2454+
struct rlist *rl = NULL;
2455+
struct hostlist *hl = NULL;
2456+
2457+
if (!conf || !json_is_array (conf)) {
2458+
errprintf (errp, "resource config must be an array");
2459+
return NULL;
2460+
}
2461+
2462+
if (!(hl = hostlist_create ())
2463+
|| !(rl = rlist_create ())) {
2464+
errprintf (errp, "Out of memory");
2465+
goto error;
2466+
}
2467+
2468+
json_array_foreach (conf, index, entry) {
2469+
const char *hosts = NULL;
2470+
const char *cores = NULL;
2471+
const char *gpus = NULL;
2472+
json_t *properties = NULL;
2473+
json_error_t error;
2474+
2475+
if (json_unpack_ex (entry, &error, 0,
2476+
"{s:s s?s s?s s?o !}",
2477+
"hosts", &hosts,
2478+
"cores", &cores,
2479+
"gpus", &gpus,
2480+
"properties", &properties) < 0) {
2481+
errprintf (errp, "config[%ld]: %s", index, error.text);
2482+
goto error;
2483+
}
2484+
if (rlist_config_add_entry (rl,
2485+
hl,
2486+
errp,
2487+
index,
2488+
hosts,
2489+
cores,
2490+
gpus,
2491+
properties) < 0)
2492+
goto error;
2493+
}
2494+
2495+
if (rlist_config_check (rl, errp) < 0)
2496+
goto error;
2497+
2498+
hostlist_destroy (hl);
2499+
return rl;
2500+
error:
2501+
hostlist_destroy (hl);
2502+
rlist_destroy (rl);
2503+
return NULL;
2504+
}
2505+
23002506
/* vi: ts=4 sw=4 expandtab
23012507
*/

src/common/librlist/rlist.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,4 +280,6 @@ int rlist_assign_properties (struct rlist *rl,
280280
*/
281281
char *rlist_properties_encode (struct rlist *rl);
282282

283+
struct rlist *rlist_from_config (json_t *conf, flux_error_t *errp);
284+
283285
#endif /* !HAVE_SCHED_RLIST_H */

src/common/librlist/rnode.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,10 @@ static int rnode_child_add_idset (struct rnode_child *c,
125125
* then add 'ids' to that child (it is an error if one or more ids
126126
* are already set in child 'name'.
127127
*/
128-
static struct rnode_child * rnode_add_child_idset (struct rnode *n,
129-
const char *name,
130-
const struct idset *ids,
131-
const struct idset *avail)
128+
struct rnode_child * rnode_add_child_idset (struct rnode *n,
129+
const char *name,
130+
const struct idset *ids,
131+
const struct idset *avail)
132132
{
133133
struct rnode_child *c = zhashx_lookup (n->children, name);
134134

@@ -187,6 +187,7 @@ struct rnode *rnode_new (const char *name, uint32_t rank)
187187

188188
int rnode_add (struct rnode *orig, struct rnode *n)
189189
{
190+
int rc = 0;
190191
struct rnode_child *c;
191192
if (!orig || !n)
192193
return -1;
@@ -197,7 +198,21 @@ int rnode_add (struct rnode *orig, struct rnode *n)
197198
return -1;
198199
c = zhashx_next (n->children);
199200
}
200-
return 0;
201+
if (n->properties) {
202+
zlistx_t *l = zhashx_keys (n->properties);
203+
if (l != NULL) {
204+
const char *property = zlistx_first (l);
205+
while (property) {
206+
if (rnode_set_property (orig, property) < 0)
207+
rc = -1;
208+
property = zlistx_next (l);
209+
}
210+
zlistx_destroy (&l);
211+
}
212+
else
213+
rc = -1;
214+
}
215+
return rc;
201216
}
202217

203218
struct rnode *rnode_create (const char *name, uint32_t rank, const char *ids)

src/common/librlist/rnode.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ struct rnode {
4141
zhashx_t *properties;
4242
};
4343

44+
struct rnode *rnode_new (const char *name, uint32_t rank);
45+
46+
struct rnode_child * rnode_add_child_idset (struct rnode *n,
47+
const char *name,
48+
const struct idset *ids,
49+
const struct idset *avail);
50+
4451
/* Create a resource node object from an existing idset `set`
4552
*/
4653
struct rnode *rnode_create_idset (const char *name,

0 commit comments

Comments
 (0)