Skip to content

Commit 2c3ac70

Browse files
committed
fix: Missing duration metrics if controller restart (argoproj#6815)
Signed-off-by: Saravanan Balasubramanian <[email protected]>
1 parent a87e94b commit 2c3ac70

File tree

2 files changed

+155
-6
lines changed

2 files changed

+155
-6
lines changed

workflow/controller/operator.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,13 @@ func (woc *wfOperationCtx) operate(ctx context.Context) {
254254
woc.preExecutionNodePhases[node.ID] = node.Phase
255255
}
256256

257+
if woc.execWf.Spec.Metrics != nil {
258+
realTimeScope := map[string]func() float64{common.GlobalVarWorkflowDuration: func() float64 {
259+
return time.Since(woc.wf.Status.StartedAt.Time).Seconds()
260+
}}
261+
woc.computeMetrics(woc.execWf.Spec.Metrics.Prometheus, woc.globalParams, realTimeScope, true)
262+
}
263+
257264
if woc.wf.Status.Phase == wfv1.WorkflowUnknown {
258265
woc.markWorkflowRunning(ctx)
259266
err := woc.createPDBResource(ctx)
@@ -271,12 +278,6 @@ func (woc *wfOperationCtx) operate(ctx context.Context) {
271278
woc.requeueAfter(time.Until(*woc.workflowDeadline))
272279
}
273280

274-
if woc.execWf.Spec.Metrics != nil {
275-
realTimeScope := map[string]func() float64{common.GlobalVarWorkflowDuration: func() float64 {
276-
return time.Since(woc.wf.Status.StartedAt.Time).Seconds()
277-
}}
278-
woc.computeMetrics(woc.execWf.Spec.Metrics.Prometheus, woc.globalParams, realTimeScope, true)
279-
}
280281
woc.wf.Status.EstimatedDuration = woc.estimateWorkflowDuration()
281282
} else {
282283
woc.workflowDeadline = woc.getWorkflowDeadline()

workflow/controller/operator_metrics_test.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,3 +586,151 @@ func TestProcessedRetryNode(t *testing.T) {
586586
assert.NoError(t, err)
587587
assert.Contains(t, metricErrorCounterString, `value:1`)
588588
}
589+
590+
var suspendWfWithMetrics = `apiVersion: argoproj.io/v1alpha1
591+
kind: Workflow
592+
metadata:
593+
name: suspend-template-qndm5
594+
spec:
595+
entrypoint: suspend
596+
metrics:
597+
prometheus:
598+
- gauge:
599+
realtime: true
600+
value: '{{workflow.duration}}'
601+
help: Duration gauge by name
602+
labels:
603+
- key: name
604+
value: model_a
605+
name: exec_duration_gauge
606+
templates:
607+
- name: suspend
608+
steps:
609+
- - name: build
610+
template: whalesay
611+
- - name: approve
612+
template: approve
613+
- - name: delay
614+
template: delay
615+
- - name: release
616+
template: whalesay
617+
- name: approve
618+
suspend: {}
619+
- name: delay
620+
suspend:
621+
duration: "20"
622+
- container:
623+
args:
624+
- hello world
625+
command:
626+
- cowsay
627+
image: docker/whalesay
628+
name: ""
629+
name: whalesay
630+
ttlStrategy:
631+
secondsAfterCompletion: 600
632+
status:
633+
conditions:
634+
- status: "False"
635+
type: PodRunning
636+
finishedAt: null
637+
nodes:
638+
suspend-template-qndm5:
639+
children:
640+
- suspend-template-qndm5-343839516
641+
displayName: suspend-template-qndm5
642+
finishedAt: null
643+
id: suspend-template-qndm5
644+
name: suspend-template-qndm5
645+
phase: Running
646+
progress: 1/1
647+
startedAt: "2021-09-28T12:23:10Z"
648+
templateName: suspend
649+
templateScope: local/suspend-template-qndm5
650+
type: Steps
651+
suspend-template-qndm5-343839516:
652+
boundaryID: suspend-template-qndm5
653+
children:
654+
- suspend-template-qndm5-2823755246
655+
displayName: '[0]'
656+
finishedAt: "2021-09-28T12:23:20Z"
657+
id: suspend-template-qndm5-343839516
658+
name: suspend-template-qndm5[0]
659+
phase: Succeeded
660+
progress: 1/1
661+
resourcesDuration:
662+
cpu: 6
663+
memory: 3
664+
startedAt: "2021-09-28T12:23:10Z"
665+
templateScope: local/suspend-template-qndm5
666+
type: StepGroup
667+
suspend-template-qndm5-2823755246:
668+
boundaryID: suspend-template-qndm5
669+
children:
670+
- suspend-template-qndm5-3632002577
671+
displayName: build
672+
finishedAt: "2021-09-28T12:23:16Z"
673+
hostNodeName: kind-control-plane
674+
id: suspend-template-qndm5-2823755246
675+
name: suspend-template-qndm5[0].build
676+
outputs:
677+
exitCode: "0"
678+
phase: Succeeded
679+
progress: 1/1
680+
resourcesDuration:
681+
cpu: 6
682+
memory: 3
683+
startedAt: "2021-09-28T12:23:10Z"
684+
templateName: whalesay
685+
templateScope: local/suspend-template-qndm5
686+
type: Pod
687+
suspend-template-qndm5-3456849218:
688+
boundaryID: suspend-template-qndm5
689+
displayName: approve
690+
finishedAt: null
691+
id: suspend-template-qndm5-3456849218
692+
name: suspend-template-qndm5[1].approve
693+
phase: Running
694+
startedAt: "2021-09-28T12:23:20Z"
695+
templateName: approve
696+
templateScope: local/suspend-template-qndm5
697+
type: Suspend
698+
suspend-template-qndm5-3632002577:
699+
boundaryID: suspend-template-qndm5
700+
children:
701+
- suspend-template-qndm5-3456849218
702+
displayName: '[1]'
703+
finishedAt: null
704+
id: suspend-template-qndm5-3632002577
705+
name: suspend-template-qndm5[1]
706+
phase: Running
707+
startedAt: "2021-09-28T12:23:20Z"
708+
templateScope: local/suspend-template-qndm5
709+
type: StepGroup
710+
phase: Running
711+
progress: 1/1
712+
resourcesDuration:
713+
cpu: 6
714+
memory: 3
715+
startedAt: "2021-09-28T12:23:10Z"
716+
`
717+
718+
func TestControllerRestartWithRunningWorkflow(t *testing.T) {
719+
cancel, controller := newController()
720+
defer cancel()
721+
ctx := context.Background()
722+
wfcset := controller.wfclientset.ArgoprojV1alpha1().Workflows("")
723+
wf := v1alpha1.MustUnmarshalWorkflow(suspendWfWithMetrics)
724+
_, err := wfcset.Create(ctx, wf, metav1.CreateOptions{})
725+
assert.NoError(t, err)
726+
woc := newWorkflowOperationCtx(wf, controller)
727+
728+
woc.operate(ctx)
729+
metricDesc := wf.Spec.Metrics.Prometheus[0].GetDesc()
730+
metric := controller.metrics.GetCustomMetric(metricDesc)
731+
assert.NotNil(t, metric)
732+
metricString, err := getMetricStringValue(metric)
733+
fmt.Println(metricString)
734+
assert.NoError(t, err)
735+
assert.Contains(t, metricString, `model_a`)
736+
}

0 commit comments

Comments
 (0)