Skip to content

Commit b131566

Browse files
authored
fix(backend) fix run retry for argo (kubeflow#11585)
Signed-off-by: arpechenin <[email protected]>
1 parent 87498e8 commit b131566

File tree

2 files changed

+27
-9
lines changed

2 files changed

+27
-9
lines changed

backend/src/apiserver/resource/resource_manager_util_test.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ status:
134134
assert.Nil(t, err)
135135
newWf, nodes, err := workflow.GenerateRetryExecution()
136136

137-
newWfString, err := yaml.Marshal(newWf)
137+
newWfYaml, err := yaml.Marshal(newWf)
138+
actualNewWfString := string(newWfYaml)
138139
assert.Nil(t, err)
139140
assert.Equal(t, []string{"resubmit-hl9ft-random-fail-3879090716"}, nodes)
140141

@@ -189,6 +190,9 @@ spec:
189190
name: random-fail
190191
outputs: {}
191192
status:
193+
conditions:
194+
- status: "False"
195+
type: Completed
192196
finishedAt: null
193197
nodes:
194198
resubmit-hl9ft:
@@ -202,10 +206,8 @@ status:
202206
startedAt: "2021-05-26T09:14:07Z"
203207
templateName: rand-fail-dag
204208
type: DAG
205-
resubmit-hl9ft-random-fail-3929423573:
209+
resubmit-hl9ft-3929423573:
206210
boundaryID: resubmit-hl9ft
207-
children:
208-
- resubmit-hl9ft-3879090716
209211
displayName: A
210212
finishedAt: "2021-05-26T09:14:11Z"
211213
id: resubmit-hl9ft-3929423573
@@ -218,5 +220,5 @@ status:
218220
startedAt: "2021-05-26T09:14:07Z"
219221
`
220222

221-
assert.Equal(t, expectedNewWfString, string(newWfString))
223+
assert.Equal(t, expectedNewWfString, actualNewWfString)
222224
}

backend/src/common/util/workflow.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ func (w *Workflow) GenerateRetryExecution() (ExecutionSpec, []string, error) {
207207
delete(newWF.Labels, LabelKeyWorkflowPersistedFinalState)
208208
newWF.ObjectMeta.Labels[common.LabelKeyPhase] = string(workflowapi.NodeRunning)
209209
newWF.Status.Phase = workflowapi.WorkflowRunning
210+
newWF.Status.Conditions.UpsertCondition(workflowapi.Condition{Status: metav1.ConditionFalse, Type: workflowapi.ConditionTypeCompleted})
210211
newWF.Status.Message = ""
211212
newWF.Status.FinishedAt = metav1.Time{}
212213
if newWF.Spec.ActiveDeadlineSeconds != nil && *newWF.Spec.ActiveDeadlineSeconds == 0 {
@@ -223,8 +224,7 @@ func (w *Workflow) GenerateRetryExecution() (ExecutionSpec, []string, error) {
223224
switch node.Phase {
224225
case workflowapi.NodeSucceeded, workflowapi.NodeSkipped:
225226
if !strings.HasPrefix(node.Name, onExitNodeName) {
226-
nodeName := RetrievePodName(*newWF, node)
227-
newWF.Status.Nodes[nodeName] = node
227+
newWF.Status.Nodes[node.ID] = node
228228
continue
229229
}
230230
case workflowapi.NodeError, workflowapi.NodeFailed, workflowapi.NodeOmitted:
@@ -233,8 +233,7 @@ func (w *Workflow) GenerateRetryExecution() (ExecutionSpec, []string, error) {
233233
newNode.Phase = workflowapi.NodeRunning
234234
newNode.Message = ""
235235
newNode.FinishedAt = metav1.Time{}
236-
nodeName := RetrievePodName(*newWF, *newNode)
237-
newWF.Status.Nodes[nodeName] = *newNode
236+
newWF.Status.Nodes[node.ID] = *newNode
238237
continue
239238
}
240239
// do not add this status to the node. pretend as if this node never existed.
@@ -248,6 +247,23 @@ func (w *Workflow) GenerateRetryExecution() (ExecutionSpec, []string, error) {
248247
podsToDelete = append(podsToDelete, oldNodeID)
249248
}
250249
}
250+
for _, node := range newWF.Status.Nodes {
251+
var children []string
252+
for _, child := range node.Children {
253+
if _, ok := newWF.Status.Nodes[child]; ok {
254+
children = append(children, child)
255+
}
256+
}
257+
var outboundNodes []string
258+
for _, outboundNode := range node.OutboundNodes {
259+
if _, ok := newWF.Status.Nodes[outboundNode]; ok {
260+
outboundNodes = append(outboundNodes, outboundNode)
261+
}
262+
}
263+
node.Children = children
264+
node.OutboundNodes = outboundNodes
265+
newWF.Status.Nodes[node.ID] = node
266+
}
251267
return NewWorkflow(newWF), podsToDelete, nil
252268
}
253269

0 commit comments

Comments
 (0)