Skip to content

Commit 0442094

Browse files
committed
lib/resourcebuilder/apps: Only error on Deployment Available=False *and* Progressing=False
Available=True, Progressing=False is the happy, steady state. Available=True, Progressing=True is a happy update. Available=False, Progressing=True is acceptable outage, e.g. during an update with the Recreate strategy [1]: $ curl -s https://storage.googleapis.com/origin-ci-test/logs/release-openshift-origin-installer-e2e-gcp-upgrade-4.6/1291426211527921664/artifacts/e2e-gcp-upgrade/container-logs/test.log | grep MinimumReplicasUnavailable | head -n1 Aug 6 17:56:00.674: INFO: deployment status: v1.DeploymentStatus{ObservedGeneration:1, Replicas:1, UpdatedReplicas:1, ReadyReplicas:0, AvailableReplicas:0, UnavailableReplicas:1, Conditions:[]v1.DeploymentCondition{v1.DeploymentCondition{Type:"Available", Status:"False", LastUpdateTime:v1.Time{Time:time.Time{wall:0x0, ext:63732333358, loc:(*time.Location)(0x9e74040)}}, LastTransitionTime:v1.Time{Time:time.Time{wall:0x0, ext:63732333358, loc:(*time.Location)(0x9e74040)}}, Reason:"MinimumReplicasUnavailable", Message:"Deployment does not have minimum availability."}, v1.DeploymentCondition{Type:"Progressing", Status:"True", LastUpdateTime:v1.Time{Time:time.Time{wall:0x0, ext:63732333358, loc:(*time.Location)(0x9e74040)}}, LastTransitionTime:v1.Time{Time:time.Time{wall:0x0, ext:63732333358, loc:(*time.Location)(0x9e74040)}}, Reason:"ReplicaSetUpdated", Message:"ReplicaSet \"dp-7f9df745ff\" is progressing."}}, CollisionCount:(*int32)(nil)} Available=False, Progressing=False is the Deployment controller saying "I cannot deliver my expected service level for this Deployment", so that's when we should be complaining. Fixes noise like: Aug 6 18:03:00.500: INFO: cluster upgrade is Failing: Multiple errors are preventing progress: * Could not update namespace "openshift-service-ca-operator" (467 of 608) * deployment openshift-cluster-machine-approver/machine-approver is not available MinimumReplicasUnavailable: Deployment does not have minimum availability. * deployment openshift-ingress-operator/ingress-operator is not available MinimumReplicasUnavailable: Deployment does not have minimum availability. (the namespace part of that message is a separate issue). [1]: https://prow.ci.openshift.org/view/gcs/origin-ci-test/logs/release-openshift-origin-installer-e2e-gcp-upgrade-4.6/1291426211527921664
1 parent 16d3d84 commit 0442094

File tree

1 file changed

+3
-12
lines changed

1 file changed

+3
-12
lines changed

lib/resourcebuilder/apps.go

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,20 +114,11 @@ func (b *builder) checkDeploymentHealth(ctx context.Context, deployment *appsv1.
114114
}
115115
}
116116

117-
if availableCondition != nil && availableCondition.Status == corev1.ConditionFalse {
117+
if availableCondition != nil && availableCondition.Status == corev1.ConditionFalse && progressingCondition != nil && progressingCondition.Status == corev1.ConditionFalse {
118118
return &payload.UpdateError{
119-
Nested: fmt.Errorf("deployment %s is not available; updated replicas=%d of %d, available replicas=%d of %d", iden, d.Status.UpdatedReplicas, d.Status.Replicas, d.Status.AvailableReplicas, d.Status.Replicas),
119+
Nested: fmt.Errorf("deployment %s is not available and not progressing; updated replicas=%d of %d, available replicas=%d of %d", iden, d.Status.UpdatedReplicas, d.Status.Replicas, d.Status.AvailableReplicas, d.Status.Replicas),
120120
Reason: "WorkloadNotAvailable",
121-
Message: fmt.Sprintf("deployment %s is not available %s: %s", iden, availableCondition.Reason, availableCondition.Message),
122-
Name: iden,
123-
}
124-
}
125-
126-
if progressingCondition != nil && progressingCondition.Status == corev1.ConditionFalse {
127-
return &payload.UpdateError{
128-
Nested: fmt.Errorf("deployment %s is not progressing; updated replicas=%d of %d, available replicas=%d of %d", iden, d.Status.UpdatedReplicas, d.Status.Replicas, d.Status.AvailableReplicas, d.Status.Replicas),
129-
Reason: "WorkloadNotAvailable",
130-
Message: fmt.Sprintf("deployment %s is not progressing %s: %s", iden, progressingCondition.Reason, progressingCondition.Message),
121+
Message: fmt.Sprintf("deployment %s is not available %s (%s) or progressing %s (%s)", iden, availableCondition.Reason, availableCondition.Message, progressingCondition.Reason, progressingCondition.Message),
131122
Name: iden,
132123
}
133124
}

0 commit comments

Comments
 (0)