Skip to content

Commit 771d741

Browse files
committed
resource: support resource.reserve configuration key
Problem: There is no way to reserve a set of cores so they cannot be used by the scheduler of a Flux instance. Add support for a new config key `resource.reserve`, which takes a string of the form `cores[@Ranks]` where `cores` and the optional `ranks` are idsets specifying the cores to reserve and the ranks on which to reserve them. If ranks is not specified, then the spec applies to all ranks. The reserved set of cores are subtracted from the resource set before it is handed off to the scheduler. The reserved resource set is also removed from the status response used by `flux resource list`.
1 parent 226301c commit 771d741

File tree

4 files changed

+32
-5
lines changed

4 files changed

+32
-5
lines changed

src/modules/resource/acquire.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
#include "reslog.h"
6666
#include "inventory.h"
6767
#include "exclude.h"
68+
#include "reserve.h"
6869
#include "drain.h"
6970
#include "acquire.h"
7071
#include "monitor.h"
@@ -108,6 +109,7 @@ static int acquire_request_init (struct acquire_request *ar,
108109
{
109110
struct resource_ctx *ctx = acquire->ctx;
110111
const struct idset *exclude = exclude_get (ctx->exclude);
112+
const struct rlist *reserved = reserve_get (ctx->reserve);
111113
json_error_t e;
112114
struct rlist *rl;
113115
struct idset *drain = NULL;
@@ -116,8 +118,11 @@ static int acquire_request_init (struct acquire_request *ar,
116118
errno = EINVAL;
117119
return -1;
118120
}
119-
if (exclude && idset_count (exclude) > 0) {
120-
(void)rlist_remove_ranks (rl, (struct idset *)exclude);
121+
if (reserved || (exclude && idset_count (exclude) > 0)) {
122+
if (exclude)
123+
(void)rlist_remove_ranks (rl, (struct idset *)exclude);
124+
if (reserved)
125+
(void)rlist_subtract (rl, reserved);
121126
if (!(ar->resources = rlist_to_R (rl))) {
122127
errno = ENOMEM;
123128
goto error;

src/modules/resource/resource.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,18 @@
3636
#include "rutil.h"
3737
#include "status.h"
3838
#include "upgrade.h"
39+
#include "reserve.h"
3940

4041
/* Parse [resource] table.
4142
*
4243
* exclude = "targets"
4344
* Exclude specified broker rank(s) or hosts from scheduling
4445
*
46+
* reserve = "corespec"
47+
* Exclude specified coes from scheduling, corespec has the form:
48+
* "cores[@ranks]", where each of cores and ranks are non-empty
49+
* idsets.
50+
*
4551
* [[resource.confg]]
4652
* Resource configuration array
4753
*
@@ -71,6 +77,7 @@ static int parse_config (struct resource_ctx *ctx,
7177
const char *exclude = NULL;
7278
const char *path = NULL;
7379
const char *scheduling_path = NULL;
80+
const char *reserve = NULL;
7481
int noverify = 0;
7582
int norestrict = 0;
7683
int no_update_watch = 0;
@@ -80,12 +87,13 @@ static int parse_config (struct resource_ctx *ctx,
8087

8188
if (flux_conf_unpack (conf,
8289
&error,
83-
"{s?{s?s s?s s?o s?s s?b s?b s?b s?b !}}",
90+
"{s?{s?s s?s s?o s?s s?s s?b s?b s?b s?b !}}",
8491
"resource",
8592
"path", &path,
8693
"scheduling", &scheduling_path,
8794
"config", &config,
8895
"exclude", &exclude,
96+
"reserve", &reserve,
8997
"norestrict", &norestrict,
9098
"noverify", &noverify,
9199
"no-update-watch", &no_update_watch,
@@ -147,6 +155,7 @@ static int parse_config (struct resource_ctx *ctx,
147155
}
148156
if (rconfig) {
149157
rconfig->exclude_idset = exclude;
158+
rconfig->reserve = reserve;
150159
rconfig->noverify = noverify ? true : false;
151160
rconfig->norestrict = norestrict ? true : false;
152161
rconfig->no_update_watch = no_update_watch ? true : false;
@@ -238,6 +247,7 @@ static void resource_ctx_destroy (struct resource_ctx *ctx)
238247
topo_destroy (ctx->topology);
239248
monitor_destroy (ctx->monitor);
240249
exclude_destroy (ctx->exclude);
250+
reserve_destroy (ctx->reserve);
241251
reslog_destroy (ctx->reslog);
242252
inventory_destroy (ctx->inventory);
243253
flux_msg_handler_delvec (ctx->handlers);
@@ -383,6 +393,8 @@ int mod_main (flux_t *h, int argc, char **argv)
383393
goto error;
384394
if (!(ctx->drain = drain_create (ctx, eventlog)))
385395
goto error;
396+
if (!(ctx->reserve = reserve_create (ctx, config.reserve)))
397+
goto error;
386398
}
387399
/* topology is initialized after exclude/drain etc since this
388400
* rank may attempt to drain itself due to a topology mismatch.

src/modules/resource/resource.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
struct resource_config {
1515
json_t *R;
1616
const char *exclude_idset;
17+
const char *reserve;
1718
bool rediscover;
1819
bool noverify;
1920
bool norestrict;
@@ -29,6 +30,7 @@ struct resource_ctx {
2930
struct topo *topology;
3031
struct drain *drain;
3132
struct exclude *exclude;
33+
struct reserve *reserve;
3234
struct acquire *acquire;
3335
struct reslog *reslog;
3436
struct status *status;

src/modules/resource/status.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "rutil.h"
2222
#include "monitor.h"
2323
#include "exclude.h"
24+
#include "reserve.h"
2425
#include "status.h"
2526
#include "reslog.h"
2627

@@ -282,9 +283,11 @@ static json_t *update_properties_json (json_t *R, const struct rlist *all)
282283

283284
/* Create an rlist object from R. Omit the scheduling key. Then:
284285
* - exclude the ranks in 'exclude' (if non-NULL)
286+
* - exclude resources in 'reserved' (if non-NULL)
285287
*/
286288
static struct rlist *create_rlist (const json_t *R,
287-
const struct idset *exclude)
289+
const struct idset *exclude,
290+
const struct rlist *reserved)
288291
{
289292
json_t *cpy;
290293
struct rlist *rl;
@@ -302,6 +305,10 @@ static struct rlist *create_rlist (const json_t *R,
302305
if (rlist_remove_ranks (rl, (struct idset *)exclude) < 0)
303306
goto error;
304307
}
308+
if (reserved) {
309+
if (rlist_subtract (rl, reserved) < 0)
310+
goto error;
311+
}
305312
json_decref (cpy);
306313
return rl;
307314
error:
@@ -316,9 +323,10 @@ static struct rlist *get_resource_list (struct status *status)
316323
if (!status->cache.rl) {
317324
const json_t *R;
318325
const struct idset *exclude = exclude_get (status->ctx->exclude);
326+
const struct rlist *reserved = reserve_get (status->ctx->reserve);
319327

320328
if ((R = inventory_get (status->ctx->inventory)))
321-
status->cache.rl = create_rlist (R, exclude);
329+
status->cache.rl = create_rlist (R, exclude, reserved);
322330
}
323331
return status->cache.rl;
324332
}

0 commit comments

Comments
 (0)