Skip to content

Commit e0f2211

Browse files
authored
Merge pull request #5304 from garlick/alloc_check
job-manager: add alloc-check plugin
2 parents 49d65d0 + 4ec4d4d commit e0f2211

File tree

4 files changed

+305
-0
lines changed

4 files changed

+305
-0
lines changed

src/modules/job-manager/Makefile.am

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ noinst_LTLIBRARIES = \
1818
jobtap_plugin_LTLIBRARIES = \
1919
plugins/submit-hold.la \
2020
plugins/alloc-bypass.la \
21+
plugins/alloc-check.la \
2122
plugins/perilog.la
2223

2324
libjob_manager_la_SOURCES = \
@@ -90,6 +91,15 @@ plugins_alloc_bypass_la_LDFLAGS = \
9091
$(fluxplugin_ldflags) \
9192
-module
9293

94+
plugins_alloc_check_la_SOURCES = \
95+
plugins/alloc-check.c
96+
plugins_alloc_check_la_LIBADD = \
97+
$(top_builddir)/src/common/libflux-internal.la \
98+
$(top_builddir)/src/common/librlist/librlist.la
99+
plugins_alloc_check_la_LDFLAGS = \
100+
$(fluxplugin_ldflags) \
101+
-module
102+
93103
plugins_perilog_la_SOURCES = \
94104
plugins/perilog.c
95105
plugins_perilog_la_LIBADD = \
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
/************************************************************\
2+
* Copyright 2023 Lawrence Livermore National Security, LLC
3+
* (c.f. AUTHORS, NOTICE.LLNS, COPYING)
4+
*
5+
* This file is part of the Flux resource manager framework.
6+
* For details, see https://github.com/flux-framework.
7+
*
8+
* SPDX-License-Identifier: LGPL-3.0
9+
\************************************************************/
10+
11+
/* alloc-check.c - plugin to ensure resources are never double booked
12+
*
13+
* A fatal exception is raised on jobs that are granted resources already
14+
* granted to another.
15+
*
16+
* In order to be sure that the exception can be raised before a short job
17+
* becomes inactive, R is looked up in the KVS synchronously, causing the
18+
* job manager to be briefly unresponsive. Hence, this plugin is primarily
19+
* suited for debug/test situations.
20+
*
21+
* N.B. This plugin does not account for any jobs that might already have
22+
* allocations when the plugin is loaded.
23+
*/
24+
25+
#if HAVE_CONFIG_H
26+
#include "config.h"
27+
#endif
28+
29+
#include <jansson.h>
30+
#include <flux/core.h>
31+
#include <flux/jobtap.h>
32+
33+
#include "ccan/str/str.h"
34+
#include "src/common/librlist/rlist.h"
35+
#include "src/common/libjob/idf58.h"
36+
37+
#define PLUGIN_NAME "alloc-check"
38+
static const char *auxname = PLUGIN_NAME "::resdb";
39+
40+
/* Start out with empty resource set. Add resources on job.event.alloc
41+
* (scheduler has allocated resources to job). Subtract resources on
42+
* job.event.free (job manager has returned resources to the scheduler).
43+
*/
44+
struct resdb {
45+
struct rlist *allocated;
46+
};
47+
48+
static void resdb_destroy (struct resdb *resdb)
49+
{
50+
if (resdb) {
51+
int saved_errno = errno;
52+
rlist_destroy (resdb->allocated);
53+
free (resdb);
54+
errno = saved_errno;
55+
}
56+
}
57+
58+
static struct resdb *resdb_create (void)
59+
{
60+
struct resdb *resdb;
61+
62+
if (!(resdb = calloc (1, sizeof (*resdb))))
63+
return NULL;
64+
if (!(resdb->allocated = rlist_create())) {
65+
free (resdb);
66+
errno = ENOMEM;
67+
return NULL;
68+
}
69+
return resdb;
70+
}
71+
72+
/* Generate the kvs path to R for a given job
73+
*/
74+
static int res_makekey (flux_jobid_t id, char *buf, size_t size)
75+
{
76+
char dir[128];
77+
if (flux_job_id_encode (id, "kvs", dir, sizeof (dir)) < 0)
78+
return -1;
79+
if (snprintf (buf, size, "%s.R", dir) >= size) {
80+
errno = EOVERFLOW;
81+
return -1;
82+
}
83+
return 0;
84+
}
85+
86+
/* Synchronously look up R for a given job and convert it to an rlist object
87+
* which the caller must destroy with rlist_destroy().
88+
*/
89+
static struct rlist *res_lookup (flux_t *h, flux_jobid_t id)
90+
{
91+
char key[128];
92+
flux_future_t *f = NULL;
93+
const char *R;
94+
struct rlist *rlist;
95+
96+
if (res_makekey (id, key, sizeof (key)) < 0
97+
|| !(f = flux_kvs_lookup (h, NULL, 0, key))
98+
|| flux_kvs_lookup_get (f, &R) < 0
99+
|| !(rlist = rlist_from_R (R))) {
100+
flux_future_destroy (f);
101+
return NULL;
102+
}
103+
flux_future_destroy (f);
104+
return rlist;
105+
}
106+
107+
static int jobtap_cb (flux_plugin_t *p,
108+
const char *topic,
109+
flux_plugin_arg_t *args,
110+
void *arg)
111+
{
112+
struct resdb *resdb = flux_plugin_aux_get (p, auxname);
113+
flux_t *h = flux_jobtap_get_flux (p);
114+
flux_jobid_t id;
115+
116+
if (flux_plugin_arg_unpack (args,
117+
FLUX_PLUGIN_ARG_IN,
118+
"{s:I}",
119+
"id", &id) < 0) {
120+
flux_log (h,
121+
LOG_ERR,
122+
"%s %s: unpack: %s",
123+
PLUGIN_NAME,
124+
topic,
125+
flux_plugin_arg_strerror (args));
126+
return -1;
127+
}
128+
/* job.event.* callbacks are not received unless subscribed on a per-job
129+
* basis, so subscribe to them in the job.new callback.
130+
*/
131+
if (streq (topic, "job.new")) {
132+
if (flux_jobtap_job_subscribe (p, id) < 0) {
133+
flux_log_error (h,
134+
"%s(%s) %s: subscribe",
135+
PLUGIN_NAME,
136+
idf58 (id),
137+
topic);
138+
}
139+
}
140+
/* Look up R that was just allocated to the job and attach it to the job
141+
* aux container so we don't have to look it up again on free. Call
142+
* rlist_append() to add the resources to resdb->allocated. If that
143+
* fails, some resources are already allocated so raise a fatal exception
144+
* on the job.
145+
*/
146+
else if (streq (topic, "job.event.alloc")) {
147+
struct rlist *R;
148+
if (!(R = res_lookup (h, id))
149+
|| flux_jobtap_job_aux_set (p,
150+
id,
151+
PLUGIN_NAME "::R",
152+
R,
153+
(flux_free_f)rlist_destroy) < 0) {
154+
flux_log_error (h,
155+
"%s(%s) %s: failed to lookup or cache R",
156+
PLUGIN_NAME,
157+
idf58 (id),
158+
topic);
159+
rlist_destroy (R);
160+
return -1;
161+
}
162+
if (rlist_append (resdb->allocated, R) < 0) {
163+
flux_jobtap_raise_exception (p,
164+
id,
165+
"alloc-check",
166+
0,
167+
"resources already allocated");
168+
}
169+
}
170+
/* Get R that was just freed from the job's aux container and remove it
171+
* from resdb->allocated. Any jobs that had allocations before the module
172+
* will not have the R aux item, so silently return success in that case.
173+
*/
174+
else if (streq (topic, "job.event.free")) {
175+
struct rlist *R = flux_jobtap_job_aux_get (p, id, PLUGIN_NAME "::R");
176+
if (R) {
177+
struct rlist *diff;
178+
if (!(diff = rlist_diff (resdb->allocated, R))) {
179+
flux_log_error (h,
180+
"%s(%s) %s: rlist_diff",
181+
PLUGIN_NAME,
182+
idf58 (id),
183+
topic);
184+
return -1;
185+
}
186+
rlist_destroy (resdb->allocated);
187+
resdb->allocated = diff;
188+
}
189+
}
190+
return 0;
191+
}
192+
193+
static const struct flux_plugin_handler tab[] = {
194+
{ "job.event.alloc", jobtap_cb, NULL },
195+
{ "job.event.free", jobtap_cb, NULL },
196+
{ "job.new", jobtap_cb, NULL },
197+
{ 0 }
198+
};
199+
200+
int flux_plugin_init (flux_plugin_t *p)
201+
{
202+
struct resdb *resdb;
203+
204+
if (!(resdb = resdb_create ())
205+
|| flux_plugin_aux_set (p,
206+
auxname,
207+
resdb,
208+
(flux_free_f)resdb_destroy) < 0) {
209+
resdb_destroy (resdb);
210+
return -1;
211+
}
212+
return flux_plugin_register (p, "alloc-check", tab);
213+
}
214+
215+
// vi:ts=4 sw=4 expandtab

t/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ TESTSCRIPTS = \
159159
t2300-sched-simple.t \
160160
t2302-sched-simple-up-down.t \
161161
t2303-sched-hello.t \
162+
t2304-sched-simple-alloc-check.t \
162163
t2310-resource-module.t \
163164
t2311-resource-drain.t \
164165
t2312-resource-exclude.t \

t/t2304-sched-simple-alloc-check.t

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/bin/sh
2+
3+
test_description='check that sched-simple never double books resources'
4+
5+
# Append --logfile option if FLUX_TESTS_LOGFILE is set in environment:
6+
test -n "$FLUX_TESTS_LOGFILE" && set -- "$@" --logfile
7+
. $(dirname $0)/sharness.sh
8+
9+
test_under_flux 1
10+
11+
# Verify that alloc-check plugin works using alloc-bypass
12+
test_expect_success 'load alloc-bypass and alloc-check plugins' '
13+
flux jobtap load alloc-bypass.so &&
14+
flux jobtap load alloc-check.so
15+
'
16+
test_expect_success 'run an alloc-bypass sleep job' '
17+
flux submit \
18+
-vvv \
19+
--wait-event=start \
20+
--setattr=alloc-bypass.R="$(flux R encode -r 0)" \
21+
-n 1 \
22+
sleep inf
23+
'
24+
test_expect_success 'a regular job fails with an alloc-check exception' '
25+
run_timeout 30 flux submit --flags=waitable -vvv \
26+
--wait-event=exception \
27+
-N1 /bin/true >bypass.jobid
28+
'
29+
test_expect_success 'flux job wait says the job failed' '
30+
test_must_fail flux job wait -v $(cat bypass.jobid)
31+
'
32+
test_expect_success 'clean up jobs' '
33+
flux cancel --all &&
34+
flux queue drain
35+
'
36+
test_expect_success 'unload plugins' '
37+
flux jobtap remove alloc-check.so &&
38+
flux jobtap remove alloc-bypass.so
39+
'
40+
41+
# Check that sched-simple doesn't suffer from time limit issue like
42+
# flux-framework/flux-sched#1043
43+
#
44+
test_expect_success 'configure epilog with 2s delay' '
45+
flux config load <<-EOT
46+
[job-manager]
47+
plugins = [
48+
{ load = "perilog.so" },
49+
]
50+
epilog.command = [ "flux", "perilog-run", "epilog", "-e", "sleep,2" ]
51+
EOT
52+
'
53+
test_expect_success 'load alloc-check plugin' '
54+
flux jobtap load alloc-check.so
55+
'
56+
test_expect_success 'submit consecutive jobs that exceed their time limit' '
57+
(for i in $(seq 5); \
58+
do flux submit -N1 -x -t1s sleep inf; \
59+
done) >jobids
60+
'
61+
test_expect_success 'wait for jobs to complete and capture their stderr' '
62+
(for id in $(cat jobids); do \
63+
flux job attach $id || true; \
64+
done) 2>joberr
65+
'
66+
test_expect_success 'some jobs received timeout exception' '
67+
grep "job.exception type=timeout" joberr
68+
'
69+
test_expect_success 'no jobs received alloc-check exception' '
70+
test_must_fail grep "job.exception type=alloc-check" joberr
71+
'
72+
test_expect_success 'remove alloc-check plugin' '
73+
flux jobtap remove alloc-check.so
74+
'
75+
test_expect_success 'undo epilog config' '
76+
flux config load </dev/null
77+
'
78+
79+
test_done

0 commit comments

Comments
 (0)