Skip to content

Commit a4c8b53

Browse files
feat: add preempt reason (#4640)
#### What type of PR is this? Feature #### What this PR does / why we need it Currently the reason provided when preempting a job does not get stored, and the response to the UI is hardcoded for preempted jobs. This change adds the reason to the scheduler db runs table and appends the user to the run reason provided. <img width="1427" height="341" alt="image" src="https://github.com/user-attachments/assets/6f06effa-083e-471b-8285-34a375bcf53e" /> --------- Signed-off-by: williamvega <williamvega1006@gmail.com> Co-authored-by: JamesMurkin <jamesmurkin@hotmail.com>
1 parent 75711ed commit a4c8b53

File tree

19 files changed

+115
-39
lines changed

19 files changed

+115
-39
lines changed

internal/common/database/upsert.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func Upsert[T any](ctx *armadacontext.Context, tx pgx.Tx, tableName string, reco
130130
// Move those rows into the main table, using ON CONFLICT rules to over-write existing rows.
131131
var b strings.Builder
132132

133-
fmt.Fprintf(&b, "INSERT INTO %s SELECT %s from %s ", tableName, strings.Join(writableNames, ","), tempTableName)
133+
fmt.Fprintf(&b, "INSERT INTO %s (%s) SELECT %s from %s ", tableName, strings.Join(writableNames, ","), strings.Join(writableNames, ","), tempTableName)
134134
fmt.Fprintf(&b, "ON CONFLICT (%s) DO UPDATE SET ", writableNames[0])
135135
for i, name := range writableNames {
136136
fmt.Fprintf(&b, "%s = EXCLUDED.%s", name, name)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE runs ADD COLUMN preempt_reason TEXT;

internal/scheduler/database/models.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/scheduler/database/query.sql.go

Lines changed: 17 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/scheduler/database/query/query.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ SELECT run_id FROM runs;
5353
SELECT * FROM runs WHERE serial > $1 AND job_id = ANY(sqlc.arg(job_ids)::text[]) ORDER BY serial;
5454

5555
-- name: MarkJobRunsPreemptRequestedByJobId :exec
56-
UPDATE runs SET preempt_requested = true WHERE queue = sqlc.arg(queue) and job_set = sqlc.arg(job_set) and job_id = ANY(sqlc.arg(job_ids)::text[]) and terminated = false;
56+
UPDATE runs SET preempt_requested = true, preempt_reason = sqlc.arg(preempt_reason) WHERE queue = sqlc.arg(queue) and job_set = sqlc.arg(job_set) and job_id = ANY(sqlc.arg(job_ids)::text[]) and terminated = false;
5757

5858
-- name: MarkJobRunsSucceededById :exec
5959
UPDATE runs SET succeeded = true WHERE run_id = ANY(sqlc.arg(run_ids)::text[]);

internal/scheduler/jobdb/job.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,7 @@ func (job *Job) WithNewRun(executor, nodeId, nodeName, pool string, scheduledAtP
784784
false,
785785
false,
786786
false,
787+
nil,
787788
false,
788789
false,
789790
false,

internal/scheduler/jobdb/job_run.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ type JobRun struct {
4646
runningTime *time.Time
4747
// True if a user has requested this run be preempted.
4848
preemptRequested bool
49+
// The reason provided when preemption was requested.
50+
preemptReason *string
4951
// True if the run has been reported as preempted by the executor.
5052
preempted bool
5153
// The time at which the run was reported as preempted by the executor.
@@ -224,6 +226,7 @@ func (jobDb *JobDb) CreateRun(
224226
pending bool,
225227
running bool,
226228
preemptRequested bool,
229+
preemptReason *string,
227230
preempted bool,
228231
succeeded bool,
229232
failed bool,
@@ -249,6 +252,7 @@ func (jobDb *JobDb) CreateRun(
249252
pending: pending,
250253
running: running,
251254
preemptRequested: preemptRequested,
255+
preemptReason: preemptReason,
252256
preempted: preempted,
253257
succeeded: succeeded,
254258
failed: failed,
@@ -434,6 +438,18 @@ func (run *JobRun) WithPreemptRequested(preemptRequested bool) *JobRun {
434438
return run
435439
}
436440

441+
// PreemptReason returns the reason provided when preemption was requested, or nil if no reason was provided.
442+
func (run *JobRun) PreemptReason() *string {
443+
return run.preemptReason
444+
}
445+
446+
// WithPreemptReason returns a copy of the job run with the preemptReason updated.
447+
func (run *JobRun) WithPreemptReason(preemptReason *string) *JobRun {
448+
run = run.DeepCopy()
449+
run.preemptReason = preemptReason
450+
return run
451+
}
452+
437453
// Preempted Returns true if the executor has reported the job run as preempted
438454
func (run *JobRun) Preempted() bool {
439455
return run.preempted

internal/scheduler/jobdb/job_run_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ var (
4040
makeTestResourceListFactory(),
4141
)
4242
scheduledAtPriority = int32(5)
43+
testPreemptReason = "test preempt reason"
4344
)
4445

4546
func init() {
@@ -59,6 +60,7 @@ var baseJobRun = jobDb.CreateRun(
5960
false,
6061
false,
6162
false,
63+
nil,
6264
false,
6365
false,
6466
false,
@@ -136,6 +138,7 @@ func TestDeepCopy(t *testing.T) {
136138
true,
137139
true,
138140
true,
141+
&testPreemptReason,
139142
true,
140143
true,
141144
true,
@@ -161,6 +164,7 @@ func TestDeepCopy(t *testing.T) {
161164
true,
162165
true,
163166
true,
167+
&testPreemptReason,
164168
true,
165169
true,
166170
true,

internal/scheduler/jobdb/reconciliation.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ func (jobDb *JobDb) reconcileRunDifferences(jobRun *JobRun, jobRepoRun *database
243243
rst.Running = true
244244
}
245245
if jobRepoRun.PreemptRequested && !jobRun.PreemptRequested() {
246-
jobRun = jobRun.WithPreemptRequested(true)
246+
jobRun = jobRun.WithPreemptRequested(true).WithPreemptReason(jobRepoRun.PreemptReason)
247247
rst.PreemptionRequested = true
248248
}
249249
if jobRepoRun.Preempted && !jobRun.Preempted() {
@@ -359,6 +359,7 @@ func (jobDb *JobDb) schedulerRunFromDatabaseRun(dbRun *database.Run) *JobRun {
359359
dbRun.Pending,
360360
dbRun.Running,
361361
dbRun.PreemptRequested,
362+
dbRun.PreemptReason,
362363
dbRun.Preempted,
363364
dbRun.Succeeded,
364365
dbRun.Failed,

internal/scheduler/scheduler.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,11 @@ func (s *Scheduler) generateUpdateMessagesFromJob(ctx *armadacontext.Context, jo
10121012
}
10131013
} else if lastRun.PreemptRequested() && job.PriorityClass().Preemptible {
10141014
job = job.WithQueued(false).WithFailed(true).WithUpdatedRun(lastRun.WithoutTerminal().WithFailed(true))
1015-
events = append(events, createEventsForPreemptedJob(job.Id(), lastRun.Id(), "Preempted - preemption requested via API", s.clock.Now())...)
1015+
reason := "Preempted - preemption requested via API"
1016+
if lastRun.PreemptReason() != nil && *lastRun.PreemptReason() != "" {
1017+
reason = *lastRun.PreemptReason()
1018+
}
1019+
events = append(events, createEventsForPreemptedJob(job.Id(), lastRun.Id(), reason, s.clock.Now())...)
10161020
s.metrics.ReportJobPreemptedWithType(job, schedulercontext.PreemptedViaApi)
10171021
}
10181022
}

0 commit comments

Comments
 (0)