Skip to content

Commit 076987a

Browse files
committed
Add kube_job_status_suspended metric
1 parent c87b507 commit 076987a

File tree

3 files changed

+65
-1
lines changed

3 files changed

+65
-1
lines changed

docs/metrics/workload/job-metrics.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@
1717
| kube_job_complete | Gauge | | `job_name`=&lt;job-name&gt; <br> `namespace`=&lt;job-namespace&gt; <br> `condition`=&lt;true\|false\|unknown&gt; | STABLE |
1818
| kube_job_failed | Gauge | | `job_name`=&lt;job-name&gt; <br> `namespace`=&lt;job-namespace&gt; <br> `condition`=&lt;true\|false\|unknown&gt; | STABLE |
1919
| kube_job_created | Gauge | | `job_name`=&lt;job-name&gt; <br> `namespace`=&lt;job-namespace&gt; | STABLE |
20+
| kube_job_status_suspended | Gauge | | `job_name`=&lt;job-name&gt; <br> `namespace`=&lt;job-namespace&gt; <br> `condition`=&lt;true\|false\|unknown&gt; | EXPERIMENTAL |

internal/store/job.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,30 @@ func jobMetricFamilies(allowAnnotationsList, allowLabelsList []string) []generat
355355
}
356356
}),
357357
),
358+
*generator.NewFamilyGeneratorWithStability(
359+
"kube_job_status_suspended",
360+
"The number of pods which reached Phase Suspended.",
361+
metric.Gauge,
362+
basemetrics.ALPHA,
363+
"",
364+
wrapJobFunc(func(j *v1batch.Job) *metric.Family {
365+
ms := []*metric.Metric{}
366+
for _, c := range j.Status.Conditions {
367+
if c.Type == v1batch.JobSuspended {
368+
metrics := addConditionMetrics(c.Status)
369+
for _, m := range metrics {
370+
metric := m
371+
metric.LabelKeys = []string{"condition"}
372+
ms = append(ms, metric)
373+
}
374+
}
375+
}
376+
377+
return &metric.Family{
378+
Metrics: ms,
379+
}
380+
}),
381+
),
358382
*generator.NewFamilyGeneratorWithStability(
359383
"kube_job_owner",
360384
"Information about the Job's owner.",

internal/store/job_test.go

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ func TestJobStore(t *testing.T) {
7777
# HELP kube_job_status_start_time [STABLE] StartTime represents time when the job was acknowledged by the Job Manager.
7878
# TYPE kube_job_status_start_time gauge
7979
# HELP kube_job_status_succeeded [STABLE] The number of pods which reached Phase Succeeded.
80-
# TYPE kube_job_status_succeeded gauge`
80+
# TYPE kube_job_status_succeeded gauge
81+
# HELP kube_job_status_suspended The number of pods which reached Phase Suspended.
82+
# TYPE kube_job_status_suspended gauge
83+
`
8184

8285
cases := []generateMetricsTestCase{
8386
{
@@ -272,6 +275,42 @@ func TestJobStore(t *testing.T) {
272275
kube_job_status_failed{job_name="SuccessfulJob2NoActiveDeadlineSeconds",namespace="ns1"} 0
273276
kube_job_status_start_time{job_name="SuccessfulJob2NoActiveDeadlineSeconds",namespace="ns1"} 1.495800607e+09
274277
kube_job_status_succeeded{job_name="SuccessfulJob2NoActiveDeadlineSeconds",namespace="ns1"} 1
278+
`,
279+
},
280+
{
281+
Obj: &v1batch.Job{
282+
ObjectMeta: metav1.ObjectMeta{
283+
Name: "SuspendedNoActiveDeadlineSeconds",
284+
Namespace: "ns1",
285+
Generation: 1,
286+
},
287+
Status: v1batch.JobStatus{
288+
Active: 0,
289+
Failed: 0,
290+
Succeeded: 0,
291+
StartTime: &metav1.Time{Time: SuccessfulJob2StartTime},
292+
Conditions: []v1batch.JobCondition{
293+
{Type: v1batch.JobSuspended, Status: v1.ConditionTrue},
294+
},
295+
},
296+
Spec: v1batch.JobSpec{
297+
Suspend: &trueValue,
298+
Parallelism: &Parallelism1,
299+
Completions: &Completions1,
300+
},
301+
},
302+
Want: metadata + `
303+
kube_job_owner{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1",owner_is_controller="",owner_kind="",owner_name=""} 1
304+
kube_job_info{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1
305+
kube_job_spec_completions{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1
306+
kube_job_spec_parallelism{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1
307+
kube_job_status_active{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0
308+
kube_job_status_failed{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0
309+
kube_job_status_start_time{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1.495800607e+09
310+
kube_job_status_succeeded{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0
311+
kube_job_status_suspended{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1",condition="false"} 0
312+
kube_job_status_suspended{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1",condition="true"} 1
313+
kube_job_status_suspended{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1",condition="unknown"} 0
275314
`,
276315
},
277316
}

0 commit comments

Comments
 (0)