Skip to content

Commit af0c5a8

Browse files
committed
Fix DeploymentController to comply with OpenShift Available API contract
1 parent 14a789e commit af0c5a8

File tree

2 files changed

+112
-5
lines changed

2 files changed

+112
-5
lines changed

pkg/operator/deploymentcontroller/deployment_controller.go

Lines changed: 108 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,20 +268,33 @@ func (c *DeploymentController) syncManaged(ctx context.Context, opSpec *opv1.Ope
268268
})
269269

270270
// Set Available condition
271+
// Per OpenShift API contract (config/v1/types_cluster_operator.go:156):
272+
// "A component must not report Available=False during the course of a normal upgrade."
273+
// Available should only be False when the deployment has actually failed, not during
274+
// normal rolling updates or when temporarily waiting for pods to become ready.
271275
if slices.Contains(c.conditions, opv1.OperatorStatusTypeAvailable) {
272276
availableCondition := applyoperatorv1.
273277
OperatorCondition().WithType(c.instanceName + opv1.OperatorStatusTypeAvailable)
278+
274279
if deployment.Status.AvailableReplicas > 0 {
280+
// Deployment has available replicas - clearly available
275281
availableCondition = availableCondition.
276282
WithStatus(opv1.ConditionTrue).
277283
WithMessage("Deployment is available").
278284
WithReason("AsExpected")
279-
280-
} else {
285+
} else if isDeploymentAvailableDuringRollout(deployment) {
286+
// Deployment is progressing normally (rolling update, initial deployment)
287+
// Per API contract, remain Available=True during normal operations
281288
availableCondition = availableCondition.
282-
WithStatus(opv1.ConditionFalse).
289+
WithStatus(opv1.ConditionTrue).
283290
WithMessage("Waiting for Deployment").
284291
WithReason("Deploying")
292+
} else {
293+
// Deployment appears to have failed - no available replicas and not progressing normally
294+
availableCondition = availableCondition.
295+
WithStatus(opv1.ConditionFalse).
296+
WithMessage("Deployment has failed").
297+
WithReason("DeploymentFailed")
285298
}
286299
status = status.WithConditions(availableCondition)
287300
}
@@ -389,3 +402,95 @@ func hasFinishedProgressing(deployment *appsv1.Deployment) bool {
389402
}
390403
return false
391404
}
405+
406+
// isDeploymentAvailableDuringRollout determines if a deployment should be considered Available
407+
// even when it has zero AvailableReplicas. This is true when the deployment is actively
408+
// progressing (rolling update, scaling, initial deployment) and has not failed.
409+
//
410+
// Per OpenShift API contract (config/v1/types_cluster_operator.go:156):
411+
// "A component must not report Available=False during the course of a normal upgrade."
412+
// Available should remain True during normal operations like upgrades, but should go False
413+
// when the deployment has actually failed or when pods disappear after successful deployment.
414+
func isDeploymentAvailableDuringRollout(deployment *appsv1.Deployment) bool {
415+
// First, check for hard failures
416+
for _, cond := range deployment.Status.Conditions {
417+
// ProgressDeadlineExceeded means the deployment has failed
418+
if cond.Type == appsv1.DeploymentProgressing &&
419+
cond.Status == corev1.ConditionFalse &&
420+
cond.Reason == "ProgressDeadlineExceeded" {
421+
return false
422+
}
423+
// ReplicaFailure means pods are failing to start
424+
if cond.Type == appsv1.DeploymentReplicaFailure && cond.Status == corev1.ConditionTrue {
425+
return false
426+
}
427+
}
428+
429+
// Check if deployment is actively being updated (spec change being rolled out)
430+
// This is the primary indicator of a "normal upgrade" in progress
431+
if deployment.Generation != deployment.Status.ObservedGeneration {
432+
// Spec has changed, deployment controller is working on rolling it out
433+
// Per API contract, remain Available during this normal upgrade
434+
return true
435+
}
436+
437+
// Check if we're in an active rollout (not just missing pods after successful deployment)
438+
var isProgressing bool
439+
var isRollingOut bool
440+
var hasFinishedRollout bool
441+
442+
for _, cond := range deployment.Status.Conditions {
443+
if cond.Type == appsv1.DeploymentProgressing {
444+
if cond.Status == corev1.ConditionTrue {
445+
isProgressing = true
446+
// ReplicaSetUpdated means we're actively rolling out new pods
447+
if cond.Reason == "ReplicaSetUpdated" {
448+
isRollingOut = true
449+
}
450+
}
451+
// NewReplicaSetAvailable means rollout completed successfully
452+
if cond.Status == corev1.ConditionTrue && cond.Reason == "NewReplicaSetAvailable" {
453+
hasFinishedRollout = true
454+
}
455+
}
456+
}
457+
458+
// If we're actively rolling out, remain Available per API contract
459+
if isRollingOut {
460+
return true
461+
}
462+
463+
// If rollout has finished successfully (NewReplicaSetAvailable) but pods are now missing,
464+
// this is NOT a normal upgrade - it's an operational issue (node failure, etc.)
465+
// In this case, we should report Available=False
466+
if hasFinishedRollout {
467+
return false
468+
}
469+
470+
// If we have updated replicas being created, we're in a rollout
471+
var expectedReplicas int32
472+
if deployment.Spec.Replicas != nil {
473+
expectedReplicas = *deployment.Spec.Replicas
474+
}
475+
476+
if deployment.Status.UpdatedReplicas > 0 && deployment.Status.UpdatedReplicas <= expectedReplicas {
477+
// We have updated replicas being created, this is a normal rollout
478+
return true
479+
}
480+
481+
// If progressing but no updated replicas yet, we're in early stages of rollout
482+
if isProgressing {
483+
return true
484+
}
485+
486+
// Special case: brand new deployment with no status conditions yet
487+
// This happens during initial deployment before Kubernetes has had a chance to update status
488+
if len(deployment.Status.Conditions) == 0 && deployment.Status.ObservedGeneration == 0 {
489+
// Deployment just created, is progressing normally
490+
return true
491+
}
492+
493+
// If we get here with zero replicas and no signs of progress or rollout,
494+
// this is likely a failure or pods disappeared after deployment
495+
return false
496+
}

pkg/operator/deploymentcontroller/deployment_controller_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,8 @@ func TestSync(t *testing.T) {
470470
operator: makeFakeOperatorInstance(
471471
withGenerations(1),
472472
withTrueConditions(conditionProgressing),
473-
withFalseConditions(conditionAvailable),
473+
// Per OpenShift API contract, Available remains True during normal upgrades/deployments
474+
withTrueConditions(conditionAvailable),
474475
withFinalizers(finalizerName),
475476
),
476477
},
@@ -526,7 +527,8 @@ func TestSync(t *testing.T) {
526527
// withStatus(replica0),
527528
withGenerations(1),
528529
withTrueConditions(conditionProgressing),
529-
withFalseConditions(conditionAvailable)), // Degraded is set later on
530+
// Per OpenShift API contract, Available remains True during normal upgrades/deployments
531+
withTrueConditions(conditionAvailable)),
530532
},
531533
},
532534
{

0 commit comments

Comments
 (0)