Skip to content

Commit 409b331

Browse files
fao89bshephar
andcommitted
Fix ansible job error reason
This change fixes a nil pointer dereference by ensuring the job failures exceed the defined BackoffLimit. Since our logic is written to determine if the BackoffLimit has been exceeded, there is no need to specifically check the condition.Reason that would tell us the same thing. We can simply infer from our check that the job has failed due to the BackoffLimit being reached. closes OSPRH-11068 Signed-off-by: Fabricio Aguiar <[email protected]> Co-Authored-by: Brendan Shephard <[email protected]>
1 parent 1276a85 commit 409b331

File tree

1 file changed

+17
-18
lines changed

1 file changed

+17
-18
lines changed

pkg/dataplane/deployment.go

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -195,47 +195,39 @@ func (d *Deployer) ConditionalDeploy(
195195

196196
}
197197

198-
var ansibleCondition *batchv1.JobCondition
198+
var ansibleCondition batchv1.JobCondition
199199
if nsConditions.IsFalse(readyCondition) {
200-
var ansibleEE *batchv1.Job
200+
var ansibleJob *batchv1.Job
201201
_, labelSelector := dataplaneutil.GetAnsibleExecutionNameAndLabels(&foundService, d.Deployment.Name, d.NodeSet.Name)
202-
ansibleEE, err = dataplaneutil.GetAnsibleExecution(d.Ctx, d.Helper, d.Deployment, labelSelector)
202+
ansibleJob, err = dataplaneutil.GetAnsibleExecution(d.Ctx, d.Helper, d.Deployment, labelSelector)
203203
if err != nil {
204204
// Return nil if we don't have AnsibleEE available yet
205205
if k8s_errors.IsNotFound(err) {
206206
log.Info(fmt.Sprintf("%s AnsibleEE job is not yet found", readyCondition))
207207
return nil
208208
}
209-
log.Error(err, fmt.Sprintf("Error getting ansibleEE job for %s", deployName))
209+
log.Error(err, fmt.Sprintf("Error getting ansibleJob job for %s", deployName))
210210
nsConditions.Set(condition.FalseCondition(
211211
readyCondition,
212212
condition.ErrorReason,
213213
condition.SeverityError,
214214
readyErrorMessage,
215215
err.Error()))
216216
}
217-
218-
if ansibleEE.Status.Succeeded > 0 {
217+
if ansibleJob.Status.Succeeded > 0 {
219218
log.Info(fmt.Sprintf("Condition %s ready", readyCondition))
220219
nsConditions.Set(condition.TrueCondition(
221220
readyCondition,
222221
readyMessage))
223-
} else if ansibleEE.Status.Active > 0 {
224-
log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d", ansibleEE.Name, ansibleEE.Status.Active))
225-
nsConditions.Set(condition.FalseCondition(
226-
readyCondition,
227-
condition.RequestedReason,
228-
condition.SeverityInfo,
229-
readyWaitingMessage))
230-
} else if ansibleEE.Status.Failed > 0 {
231-
errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s failed pods: %d", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.Failed)
232-
for _, condition := range ansibleEE.Status.Conditions {
222+
} else if ansibleJob.Status.Failed > *ansibleJob.Spec.BackoffLimit {
223+
errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s failed pods: %d", ansibleJob.Name, ansibleJob.Namespace, ansibleJob.Status.Failed)
224+
for _, condition := range ansibleJob.Status.Conditions {
233225
if condition.Type == batchv1.JobFailed {
234-
ansibleCondition = &condition
226+
ansibleCondition = condition
235227
}
236228
}
237229
if ansibleCondition.Reason == condition.JobReasonBackoffLimitExceeded {
238-
errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleCondition.Message)
230+
errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleJob.Name, ansibleJob.Namespace, ansibleCondition.Message)
239231
}
240232
log.Info(fmt.Sprintf("Condition %s error", readyCondition))
241233
err = fmt.Errorf(errorMsg)
@@ -245,6 +237,13 @@ func (d *Deployer) ConditionalDeploy(
245237
condition.SeverityError,
246238
readyErrorMessage,
247239
err.Error()))
240+
} else {
241+
log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d, Failed pods: %d", ansibleJob.Name, ansibleJob.Status.Active, ansibleJob.Status.Failed))
242+
nsConditions.Set(condition.FalseCondition(
243+
readyCondition,
244+
condition.RequestedReason,
245+
condition.SeverityInfo,
246+
readyWaitingMessage))
248247
}
249248
}
250249
d.Status.NodeSetConditions[d.NodeSet.Name] = nsConditions

0 commit comments

Comments
 (0)