Skip to content

Commit e41df2a

Browse files
committed
job-manager: add plugin to assist posting manual job events
Problem: Sometimes a job becomes stuck in a given state, requiring intervention by forcing an event to be posted to the job manager. Currently, this requires building a custom jobtap plugin, which is annoying and not sysadmin friendly. Add a builtin jobtap plugin that will simply post a custom job event defined in the RPC.
1 parent 6e82f81 commit e41df2a

File tree

3 files changed

+63
-1
lines changed

3 files changed

+63
-1
lines changed

src/modules/job-manager/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ libjob_manager_la_SOURCES = \
7474
plugins/begin-time.c \
7575
plugins/update-duration.c \
7676
plugins/validate-duration.c \
77-
plugins/history.c
77+
plugins/history.c \
78+
plugins/post-event.c
7879

7980
fluxinclude_HEADERS = \
8081
jobtap.h

src/modules/job-manager/jobtap.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ extern int begin_time_plugin_init (flux_plugin_t *p);
5151
extern int validate_duration_plugin_init (flux_plugin_t *p);
5252
extern int update_duration_plugin_init (flux_plugin_t *p);
5353
extern int history_plugin_init (flux_plugin_t *p);
54+
extern int post_event_init (flux_plugin_t *p);
5455

5556
struct jobtap_builtin {
5657
const char *name;
@@ -72,6 +73,7 @@ static struct jobtap_builtin jobtap_builtins [] = {
7273
{ ".validate-duration", &validate_duration_plugin_init },
7374
{ ".update-duration", &update_duration_plugin_init },
7475
{ ".history", &history_plugin_init },
76+
{ ".post-event", &post_event_init },
7577
{ 0 },
7678
};
7779

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/************************************************************\
2+
* Copyright 2024 Lawrence Livermore National Security, LLC
3+
* (c.f. AUTHORS, NOTICE.LLNS, COPYING)
4+
*
5+
* This file is part of the Flux resource manager framework.
6+
* For details, see https://github.com/flux-framework.
7+
*
8+
* SPDX-License-Identifier: LGPL-3.0
9+
\************************************************************/
10+
11+
/* post-event.c - post manual events to job eventlog
12+
*/
13+
14+
#if HAVE_CONFIG_H
15+
#include "config.h"
16+
#endif
17+
18+
#include <jansson.h>
19+
#include <flux/core.h>
20+
#include <flux/jobtap.h>
21+
22+
static void post_event_cb (flux_t *h,
23+
flux_msg_handler_t *mh,
24+
const flux_msg_t *msg,
25+
void *arg)
26+
{
27+
flux_plugin_t *p = arg;
28+
flux_jobid_t id;
29+
const char *name;
30+
json_t *context = NULL;
31+
32+
if (flux_msg_unpack (msg,
33+
"{s:I s:s s?o}",
34+
"id", &id,
35+
"name", &name,
36+
"context", &context) < 0)
37+
goto error;
38+
if (context) {
39+
if (flux_jobtap_event_post_pack (p, id, name, "O", context) < 0)
40+
goto error;
41+
}
42+
else if (flux_jobtap_event_post_pack (p, id, name, NULL) < 0)
43+
goto error;
44+
if (flux_respond (h, msg, NULL) < 0)
45+
flux_log_error (h, "error responding to job-manager.post-event");
46+
error:
47+
if (flux_respond_error (h, msg, errno, NULL) < 0)
48+
flux_log_error (h, "error responding to job-manager.post-event");
49+
}
50+
51+
52+
int post_event_init (flux_plugin_t *p)
53+
{
54+
if (flux_jobtap_service_register_ex (p, "post", 0, post_event_cb, p) < 0)
55+
return -1;
56+
return 0;
57+
}
58+
59+
// vi:ts=4 sw=4 expandtab

0 commit comments

Comments
 (0)