Skip to content

Commit 6a4047f

Browse files
committed
job-list: rebuild and store eventlog
Problem: In the near future we will need access to the job's eventlog when a job goes inactive. Solution: Rebuild the job eventlog from the events journal and store it internally in struct job.
1 parent 3d33f37 commit 6a4047f

File tree

2 files changed

+53
-0
lines changed

2 files changed

+53
-0
lines changed

src/modules/job-list/job_state.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ static void job_destroy (void *data)
124124
grudgeset_destroy (job->dependencies);
125125
json_decref (job->jobspec);
126126
json_decref (job->R);
127+
free (job->eventlog);
127128
json_decref (job->exception_context);
128129
zlist_destroy (&job->next_states);
129130
free (job);
@@ -899,6 +900,42 @@ void job_state_unpause_cb (flux_t *h, flux_msg_handler_t *mh,
899900
flux_log_error (h, "error responding to unpause request");
900901
}
901902

903+
static int store_eventlog_entry (struct list_ctx *ctx,
904+
struct job *job,
905+
json_t *entry)
906+
{
907+
char *s = json_dumps (entry, 0);
908+
int rv = -1;
909+
910+
/* entry should have been verified via eventlog_entry_parse()
911+
* earlier */
912+
assert (s);
913+
914+
if (!job->eventlog) {
915+
job->eventlog_len = strlen (s) + 2; /* +2 for \n and \0 */
916+
if (!(job->eventlog = calloc (1, job->eventlog_len))) {
917+
flux_log_error (ctx->h, "calloc");
918+
goto error;
919+
920+
}
921+
strcpy (job->eventlog, s);
922+
strcat (job->eventlog, "\n");
923+
}
924+
else {
925+
job->eventlog_len += strlen (s) + 1; /* +1 for \n */
926+
if (!(job->eventlog = realloc (job->eventlog, job->eventlog_len))) {
927+
flux_log_error (ctx->h, "realloc");
928+
goto error;
929+
}
930+
strcat (job->eventlog, s);
931+
strcat (job->eventlog, "\n");
932+
}
933+
rv = 0;
934+
error:
935+
free (s);
936+
return rv;
937+
}
938+
902939
static struct job *eventlog_restart_parse (struct list_ctx *ctx,
903940
const char *eventlog,
904941
flux_jobid_t id)
@@ -928,6 +965,9 @@ static struct job *eventlog_restart_parse (struct list_ctx *ctx,
928965
goto error;
929966
}
930967

968+
if (store_eventlog_entry (ctx, job, value) < 0)
969+
goto error;
970+
931971
job->eventlog_seq++;
932972
if (!strcmp (name, "submit")) {
933973
if (submit_context_parse (ctx->h, job, context) < 0)
@@ -1257,13 +1297,18 @@ static int journal_submit_event (struct job_state_ctx *jsctx,
12571297
flux_jobid_t id,
12581298
int eventlog_seq,
12591299
double timestamp,
1300+
json_t *entry,
12601301
json_t *context)
12611302
{
12621303
if (!job) {
12631304
if (!(job = job_create (jsctx->ctx, id))){
12641305
flux_log_error (jsctx->h, "%s: job_create", __FUNCTION__);
12651306
return -1;
12661307
}
1308+
if (store_eventlog_entry (jsctx->ctx, job, entry) < 0) {
1309+
job_destroy (job);
1310+
return -1;
1311+
}
12671312
if (zhashx_insert (jsctx->index, &job->id, job) < 0) {
12681313
flux_log_error (jsctx->h, "%s: zhashx_insert", __FUNCTION__);
12691314
job_destroy (job);
@@ -1621,12 +1666,18 @@ static int journal_process_event (struct job_state_ctx *jsctx, json_t *event)
16211666
return 0;
16221667
}
16231668

1669+
if (job && job->eventlog) {
1670+
if (store_eventlog_entry (jsctx->ctx, job, entry) < 0)
1671+
return -1;
1672+
}
1673+
16241674
if (!strcmp (name, "submit")) {
16251675
if (journal_submit_event (jsctx,
16261676
job,
16271677
id,
16281678
eventlog_seq,
16291679
timestamp,
1680+
entry,
16301681
context) < 0)
16311682
return -1;
16321683
}

src/modules/job-list/job_state.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ struct job {
103103
/* cache of job information */
104104
json_t *jobspec;
105105
json_t *R;
106+
char *eventlog;
107+
size_t eventlog_len;
106108
json_t *exception_context;
107109

108110
/* Track which states we have seen and have completed transition

0 commit comments

Comments
 (0)