Skip to content

Commit f34751b

Browse files
committed
job-manager: use f58 encoding for jobids in log messages
Problem: The job manager logs jobids in their decimal representation, which aren't easily identifiable as jobids at first glance and take up more space in most cases. Log jobids as f58 where possible in the job manager using idf58(). Update a couple tests in the testsuite that expect decimal jobids in dmesg output.
1 parent 0d96998 commit f34751b

File tree

15 files changed

+126
-107
lines changed

15 files changed

+126
-107
lines changed

src/modules/job-manager/alloc.c

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <assert.h>
2626

2727
#include "src/common/libczmqcontainers/czmq_containers.h"
28+
#include "src/common/libjob/idf58.h"
2829
#include "ccan/str/str.h"
2930

3031
#include "job.h"
@@ -130,14 +131,14 @@ static void free_response_cb (flux_t *h, flux_msg_handler_t *mh,
130131
if (flux_msg_unpack (msg, "{s:I}", "id", &id) < 0)
131132
goto teardown;
132133
if (!(job = zhashx_lookup (ctx->active_jobs, &id))) {
133-
flux_log (h, LOG_ERR, "sched.free-response: id=%ju not active",
134-
(uintmax_t)id);
134+
flux_log (h, LOG_ERR, "sched.free-response: id=%s not active",
135+
idf58 (id));
135136
errno = EINVAL;
136137
goto teardown;
137138
}
138139
if (!job->has_resources) {
139-
flux_log (h, LOG_ERR, "sched.free-response: id=%ju not allocated",
140-
(uintmax_t)id);
140+
flux_log (h, LOG_ERR, "sched.free-response: %s not allocated",
141+
idf58 (id));
141142
errno = EINVAL;
142143
goto teardown;
143144
}
@@ -184,7 +185,7 @@ int cancel_request (struct alloc *alloc, struct job *job)
184185
"{s:I}",
185186
"id",
186187
job->id))) {
187-
flux_log_error (h, "sending sched.cancel id=%ju", (uintmax_t)job->id);
188+
flux_log_error (h, "sending sched.cancel id=%s", idf58 (job->id));
188189
return -1;
189190
}
190191
flux_future_destroy (f);
@@ -215,14 +216,14 @@ static void alloc_response_cb (flux_t *h, flux_msg_handler_t *mh,
215216
"annotations", &annotations) < 0)
216217
goto teardown;
217218
if (!(job = zhashx_lookup (ctx->active_jobs, &id))) {
218-
flux_log (h, LOG_ERR, "sched.alloc-response: id=%ju not active",
219-
(uintmax_t)id);
219+
flux_log (h, LOG_ERR, "sched.alloc-response: id=%s not active",
220+
idf58 (id));
220221
errno = EINVAL;
221222
goto teardown;
222223
}
223224
if (!job->alloc_pending) {
224-
flux_log (h, LOG_ERR, "sched.alloc-response: id=%ju not requested",
225-
(uintmax_t)id);
225+
flux_log (h, LOG_ERR, "sched.alloc-response: id=%s not requested",
226+
idf58 (id));
226227
errno = EINVAL;
227228
goto teardown;
228229
}
@@ -236,13 +237,13 @@ static void alloc_response_cb (flux_t *h, flux_msg_handler_t *mh,
236237
if (job->has_resources) {
237238
flux_log (h,
238239
LOG_ERR,
239-
"sched.alloc-response: id=%ju already allocated",
240-
(uintmax_t)id);
240+
"sched.alloc-response: id=%s already allocated",
241+
idf58 (id));
241242
errno = EEXIST;
242243
goto teardown;
243244
}
244245
if (annotations_update_and_publish (ctx, job, annotations) < 0)
245-
flux_log_error (h, "annotations_update: id=%ju", (uintmax_t)id);
246+
flux_log_error (h, "annotations_update: id=%s", idf58 (id));
246247

247248
/* Only modify job state after annotation event is published
248249
*/
@@ -265,7 +266,7 @@ static void alloc_response_cb (flux_t *h, flux_msg_handler_t *mh,
265266
goto teardown;
266267
}
267268
if (annotations_update_and_publish (ctx, job, annotations) < 0)
268-
flux_log_error (h, "annotations_update: id=%ju", (uintmax_t)id);
269+
flux_log_error (h, "annotations_update: id=%s", idf58 (id));
269270
break;
270271
case FLUX_SCHED_ALLOC_DENY: // error
271272
alloc->alloc_pending_count--;
@@ -281,8 +282,9 @@ static void alloc_response_cb (flux_t *h, flux_msg_handler_t *mh,
281282
EVENT_NO_COMMIT,
282283
"{s:n}", "annotations") < 0)
283284
flux_log_error (ctx->h,
284-
"%s: event_job_post_pack: id=%ju",
285-
__FUNCTION__, (uintmax_t)id);
285+
"%s: event_job_post_pack: id=%s",
286+
__FUNCTION__,
287+
idf58 (id));
286288
}
287289
if (event_job_post_pack (ctx->event, job, "exception", 0,
288290
"{ s:s s:i s:I s:s }",
@@ -310,14 +312,15 @@ static void alloc_response_cb (flux_t *h, flux_msg_handler_t *mh,
310312
EVENT_NO_COMMIT,
311313
"{s:n}", "annotations") < 0)
312314
flux_log_error (ctx->h,
313-
"%s: event_job_post_pack: id=%ju",
314-
__FUNCTION__, (uintmax_t)id);
315+
"%s: event_job_post_pack: id=%s",
316+
__FUNCTION__,
317+
idf58 (id));
315318
}
316319
if (queue_started (alloc->ctx->queue, job)) {
317320
if (event_job_action (ctx->event, job) < 0) {
318321
flux_log_error (h,
319-
"event_job_action id=%ju on alloc cancel",
320-
(uintmax_t)id);
322+
"event_job_action id=%s on alloc cancel",
323+
idf58 (id));
321324
goto teardown;
322325
}
323326
}

src/modules/job-manager/event.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "src/common/libeventlog/eventlog.h"
4545
#include "src/common/libutil/errno_safe.h"
4646
#include "src/common/libutil/jpath.h"
47+
#include "src/common/libjob/idf58.h"
4748
#include "ccan/ptrint/ptrint.h"
4849
#include "ccan/str/str.h"
4950

@@ -212,8 +213,8 @@ static void event_batch_destroy (struct event_batch *batch)
212213
job->hold_events = 0;
213214
if (event_job_post_deferred (batch->event, job) < 0)
214215
flux_log_error (batch->event->ctx->h,
215-
"%ju: error posting deferred events",
216-
(uintmax_t) job->id);
216+
"%s: error posting deferred events",
217+
idf58 (job->id));
217218
}
218219
zlist_destroy (&batch->jobs);
219220
}
@@ -440,8 +441,8 @@ int event_job_action (struct event *event, struct job *job)
440441
if (purge_enqueue_job (ctx->purge, job) < 0) {
441442
flux_log (event->ctx->h,
442443
LOG_ERR,
443-
"%ju: error adding inactive job to purge queue",
444-
(uintmax_t)job->id);
444+
"%s: error adding inactive job to purge queue",
445+
idf58 (job->id));
445446
}
446447
}
447448
(void) jobtap_call (ctx->jobtap, job, "job.destroy", NULL);
@@ -802,9 +803,9 @@ static int event_jobtap_call (struct event *event,
802803
"{s:O}",
803804
"entry", entry) < 0)
804805
flux_log (event->ctx->h, LOG_ERR,
805-
"jobtap: event.%s callback failed for job %ju",
806+
"jobtap: event.%s callback failed for job %s",
806807
name,
807-
(uintmax_t) job->id);
808+
idf58 (job->id));
808809

809810
if (job->state != old_state) {
810811
/*

src/modules/job-manager/jobtap.c

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "src/common/libutil/errno_safe.h"
3131
#include "src/common/libutil/errprintf.h"
3232
#include "src/common/libutil/aux.h"
33+
#include "src/common/libjob/idf58.h"
3334
#include "ccan/str/str.h"
3435

3536
#include "annotate.h"
@@ -190,8 +191,8 @@ static int plugin_check_dependencies (struct jobtap *jobtap,
190191
if (dependencies_unpack (jobtap, job, &error, &dependencies) < 0) {
191192
flux_log (jobtap->ctx->h,
192193
LOG_ERR,
193-
"id=%ju: plugin_register_dependencies: %s",
194-
(uintmax_t) job->id,
194+
"id=%s: plugin_register_dependencies: %s",
195+
idf58 (job->id),
195196
error);
196197
free (error);
197198
return -1;
@@ -280,8 +281,8 @@ static flux_plugin_t * jobtap_load_plugin (struct jobtap *jobtap,
280281
if (job->state == FLUX_JOB_STATE_DEPEND) {
281282
if (plugin_check_dependencies (jobtap, p, job, args) < 0)
282283
errprintf (errp,
283-
"failed to check dependencies for job %ju",
284-
job->id);
284+
"failed to check dependencies for job %s",
285+
idf58 (job->id));
285286
(void) flux_plugin_call (p, "job.state.depend", args);
286287
}
287288

@@ -696,8 +697,8 @@ int jobtap_get_priority (struct jobtap *jobtap,
696697
*/
697698
if (job->state == FLUX_JOB_STATE_SCHED)
698699
flux_log (jobtap->ctx->h, LOG_ERR,
699-
"jobtap: %ju: BUG: plugin didn't return priority",
700-
(uintmax_t) job->id);
700+
"jobtap: %s: BUG: plugin didn't return priority",
701+
idf58 (job->id));
701702
}
702703
/*
703704
* O/w, plugin provided a new priority.
@@ -727,8 +728,8 @@ static void error_asprintf (struct jobtap *jobtap,
727728
va_start (ap, fmt);
728729
if (vasprintf (errp, fmt, ap) < 0)
729730
flux_log_error (jobtap->ctx->h,
730-
"id=%ju: failed to create error string: fmt=%s",
731-
(uintmax_t) job->id, fmt);
731+
"id=%s: failed to create error string: fmt=%s",
732+
idf58 (job->id), fmt);
732733
va_end (ap);
733734
}
734735

@@ -955,8 +956,8 @@ int jobtap_check_dependencies (struct jobtap *jobtap,
955956
"%s (job may be stuck in DEPEND state)",
956957
*errp) < 0)
957958
flux_log_error (jobtap->ctx->h,
958-
"id=%ju: failed to raise dependency exception",
959-
(uintmax_t) job->id);
959+
"id=%s: failed to raise dependency exception",
960+
idf58 (job->id));
960961
free (*errp);
961962
*errp = NULL;
962963
}
@@ -984,9 +985,9 @@ int jobtap_notify_subscribers (struct jobtap *jobtap,
984985

985986
if (snprintf (topic, topiclen, "job.event.%s", name) >= topiclen) {
986987
flux_log (jobtap->ctx->h, LOG_ERR,
987-
"jobtap: %s: %ju: event topic name too long",
988+
"jobtap: %s: %s: event topic name too long",
988989
name,
989-
(uintmax_t) job->id);
990+
idf58 (job->id));
990991
return -1;
991992
}
992993

@@ -995,9 +996,9 @@ int jobtap_notify_subscribers (struct jobtap *jobtap,
995996
va_end (ap);
996997
if (!args) {
997998
flux_log (jobtap->ctx->h, LOG_ERR,
998-
"jobtap: %s: %ju: failed to create plugin args",
999+
"jobtap: %s: %s: failed to create plugin args",
9991000
topic,
1000-
(uintmax_t) job->id);
1001+
idf58 (job->id));
10011002
return -1;
10021003
}
10031004

@@ -1024,9 +1025,9 @@ int jobtap_call (struct jobtap *jobtap,
10241025
va_start (ap, fmt);
10251026
if (!(args = jobtap_args_vcreate (jobtap, job, fmt, ap))) {
10261027
flux_log (jobtap->ctx->h, LOG_ERR,
1027-
"jobtap: %s: %ju: failed to create plugin args",
1028+
"jobtap: %s: %s: failed to create plugin args",
10281029
topic,
1029-
(uintmax_t) job->id);
1030+
idf58 (job->id));
10301031
}
10311032
va_end (ap);
10321033

@@ -1071,9 +1072,9 @@ int jobtap_call (struct jobtap *jobtap,
10711072
rc = annotations_update_and_publish (jobtap->ctx, job, note);
10721073
if (rc < 0)
10731074
flux_log_error (jobtap->ctx->h,
1074-
"jobtap: %s: %ju: annotations_update",
1075+
"jobtap: %s: %s: annotations_update",
10751076
topic,
1076-
(uintmax_t) job->id);
1077+
idf58 (job->id));
10771078
}
10781079
if (priority >= FLUX_JOB_PRIORITY_MIN) {
10791080
/*

src/modules/job-manager/plugins/alloc-bypass.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <flux/jobtap.h>
2525

2626
#include "src/common/librlist/rlist.h"
27+
#include "src/common/libjob/idf58.h"
2728

2829
static void alloc_continuation (flux_future_t *f, void *arg)
2930
{
@@ -56,8 +57,8 @@ static void alloc_continuation (flux_future_t *f, void *arg)
5657
*/
5758
if (flux_jobtap_job_aux_set (p, *idptr, "alloc-bypass::free", p, NULL) < 0)
5859
flux_log_error (flux_jobtap_get_flux (p),
59-
"id=%ju: Failed to set alloc-bypass::free",
60-
*idptr);
60+
"id=%s: Failed to set alloc-bypass::free",
61+
idf58 (*idptr));
6162

6263
done:
6364
flux_future_destroy (f);

src/modules/job-manager/plugins/begin-time.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <flux/core.h>
2121
#include <flux/jobtap.h>
2222

23+
#include "src/common/libjob/idf58.h"
24+
2325
struct begin_time_arg {
2426
flux_plugin_t *p;
2527
flux_watcher_t *w;
@@ -94,7 +96,7 @@ static int add_begin_time (flux_plugin_t *p,
9496
flux_watcher_start (arg->w);
9597

9698
if (flux_jobtap_dependency_add (p, id, arg->desc) < 0) {
97-
flux_log_error (h, "%ju: flux_jobtap_dependency_add", (uintmax_t) id);
99+
flux_log_error (h, "%s: flux_jobtap_dependency_add", idf58 (id));
98100
goto error;
99101
}
100102

src/modules/job-manager/plugins/dependency-after.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <flux/jobtap.h>
2121

2222
#include "src/common/libutil/iterators.h"
23+
#include "src/common/libjob/idf58.h"
2324
#include "src/common/libczmqcontainers/czmq_containers.h"
2425
#include "ccan/str/str.h"
2526

@@ -413,8 +414,8 @@ static int dependency_after_cb (flux_plugin_t *p,
413414
&& flux_jobtap_job_subscribe (p, afterid) < 0) {
414415
after_info_destroy (after);
415416
after_ref_destroy (ref);
416-
return flux_jobtap_reject_job (p, args, "failed to subscribe to %ju",
417-
(uintmax_t) id);
417+
return flux_jobtap_reject_job (p, args, "failed to subscribe to %s",
418+
idf58 (id));
418419
}
419420

420421
return 0;
@@ -437,8 +438,8 @@ static void remove_jobid_dependency (flux_plugin_t *p,
437438
"Failed to remove dependency %s",
438439
after->description) < 0) {
439440
flux_log_error (flux_jobtap_get_flux (p),
440-
"flux_jobtap_raise_exception: id=%ju",
441-
(uintmax_t) after->depid);
441+
"flux_jobtap_raise_exception: id=%s",
442+
idf58 (after->depid));
442443
}
443444
}
444445
}
@@ -486,8 +487,8 @@ static void raise_exceptions (flux_plugin_t *p, zlistx_t *l)
486487
"dependency",
487488
after->description) < 0)
488489
flux_log_error (flux_jobtap_get_flux (p),
489-
"id=%ju: unable to raise exception for %s",
490-
(uintmax_t) after->depid,
490+
"id=%s: unable to raise exception for %s",
491+
idf58 (after->depid),
491492
after->description);
492493
}
493494
/* N.B. = entry will be deleted at list destruction */

0 commit comments

Comments
 (0)