Skip to content

Commit b2d4d61

Browse files
committed
set status for other failure modes during ClusterExtensionRevision reconciliation
Signed-off-by: Joe Lanford <[email protected]>
1 parent 3bd5b93 commit b2d4d61

File tree

4 files changed

+74
-23
lines changed

4 files changed

+74
-23
lines changed

internal/operator-controller/applier/boxcutter.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"crypto/sha256"
77
"encoding/hex"
8+
"errors"
89
"fmt"
910
"hash"
1011
"io/fs"
@@ -204,11 +205,18 @@ func (bc *Boxcutter) apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust
204205
// TODO: Delete archived previous revisions over a certain revision limit
205206

206207
// TODO: Define constants for the ClusterExtensionRevision condition types.
207-
installedCondition := meta.FindStatusCondition(currentRevision.Status.Conditions, "Succeeded")
208-
if installedCondition == nil {
208+
progressingCondition := meta.FindStatusCondition(currentRevision.Status.Conditions, "Progressing")
209+
availableCondition := meta.FindStatusCondition(currentRevision.Status.Conditions, "Available")
210+
succeededCondition := meta.FindStatusCondition(currentRevision.Status.Conditions, "Succeeded")
211+
212+
if progressingCondition == nil && availableCondition == nil && succeededCondition == nil {
209213
return false, "New revision created", nil
210-
} else if installedCondition.Status != metav1.ConditionTrue {
211-
return false, installedCondition.Message, nil
214+
} else if progressingCondition != nil && progressingCondition.Status == metav1.ConditionTrue {
215+
return false, progressingCondition.Message, nil
216+
} else if availableCondition != nil && availableCondition.Status != metav1.ConditionTrue {
217+
return false, "", errors.New(availableCondition.Message)
218+
} else if succeededCondition != nil && succeededCondition.Status != metav1.ConditionTrue {
219+
return false, succeededCondition.Message, nil
212220
}
213221
return true, "", nil
214222
}
@@ -240,6 +248,9 @@ func computeSHA256Hash(obj any) string {
240248
func deepHashObject(hasher hash.Hash, objectToWrite any) {
241249
hasher.Reset()
242250

251+
// TODO: change this out to `json.Marshal`. Pretty sure we found issues in the past where
252+
// spew would produce different output when internal structures changed without the
253+
// external public API changing.
243254
printer := spew.ConfigState{
244255
Indent: " ",
245256
SortKeys: true,

internal/operator-controller/controllers/clusterextensionrevision_controller.go

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package controllers
55
import (
66
"context"
77
"encoding/json"
8+
"errors"
89
"fmt"
910
"strings"
1011
"time"
@@ -115,20 +116,58 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
115116
}
116117
rres, err := c.RevisionEngine.Reconcile(ctx, *revision, opts...)
117118
if err != nil {
118-
return ctrl.Result{}, fmt.Errorf("revision reconcile: %w", err)
119+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
120+
Type: "Available",
121+
Status: metav1.ConditionFalse,
122+
Reason: "ReconcileFailure",
123+
Message: err.Error(),
124+
ObservedGeneration: rev.Generation,
125+
})
126+
return ctrl.Result{}, fmt.Errorf("revision reconcile: %w", errors.Join(err, c.Client.Status().Update(ctx, rev)))
119127
}
120128
l.Info("reconcile report", "report", rres.String())
121129

122130
// Retry failing preflight checks with a flat 10s retry.
123131
// TODO: report status, backoff?
124132
if verr := rres.GetValidationError(); verr != nil {
125133
l.Info("preflight error, retrying after 10s", "err", verr.String())
126-
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
134+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
135+
Type: "Available",
136+
Status: metav1.ConditionFalse,
137+
Reason: "RevisionValidationFailure",
138+
Message: fmt.Sprintf("revision validation error: %s", verr),
139+
ObservedGeneration: rev.Generation,
140+
})
141+
return ctrl.Result{RequeueAfter: 10 * time.Second}, c.Client.Status().Update(ctx, rev)
127142
}
128-
for _, pres := range rres.GetPhases() {
143+
for i, pres := range rres.GetPhases() {
129144
if verr := pres.GetValidationError(); verr != nil {
130145
l.Info("preflight error, retrying after 10s", "err", verr.String())
131-
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
146+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
147+
Type: "Available",
148+
Status: metav1.ConditionFalse,
149+
Reason: "PhaseValidationError",
150+
Message: fmt.Sprintf("phase %d validation error: %s", i, verr),
151+
ObservedGeneration: rev.Generation,
152+
})
153+
return ctrl.Result{RequeueAfter: 10 * time.Second}, c.Client.Status().Update(ctx, rev)
154+
}
155+
var collidingObjs []string
156+
for _, ores := range pres.GetObjects() {
157+
if ores.Action() == machinery.ActionCollision {
158+
collidingObjs = append(collidingObjs, ores.String())
159+
}
160+
}
161+
if len(collidingObjs) > 0 {
162+
l.Info("object collision error, retrying after 10s", "collisions", collidingObjs)
163+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
164+
Type: "Available",
165+
Status: metav1.ConditionFalse,
166+
Reason: "ObjectCollisions",
167+
Message: fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")),
168+
ObservedGeneration: rev.Generation,
169+
})
170+
return ctrl.Result{RequeueAfter: 10 * time.Second}, c.Client.Status().Update(ctx, rev)
132171
}
133172
}
134173

@@ -201,14 +240,14 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
201240
}
202241
if rres.InTransistion() {
203242
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
204-
Type: "InTransition",
243+
Type: "Progressing",
205244
Status: metav1.ConditionTrue,
206-
Reason: "InTransition",
245+
Reason: "Progressing",
207246
Message: "Rollout in progress.",
208247
ObservedGeneration: rev.Generation,
209248
})
210249
} else {
211-
meta.RemoveStatusCondition(&rev.Status.Conditions, "InTransition")
250+
meta.RemoveStatusCondition(&rev.Status.Conditions, "Progressing")
212251
}
213252

214253
return ctrl.Result{}, c.Client.Status().Update(ctx, rev)

internal/operator-controller/controllers/clusterextensionrevision_controller_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te
171171
},
172172
},
173173
{
174-
name: "set InTransition:True:InTransition condition while revision is transitioning",
174+
name: "set Progressing:True:Progressing condition while revision is transitioning",
175175
revisionResult: mockRevisionResult{
176176
inTransition: true,
177177
},
@@ -187,16 +187,16 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te
187187
Name: clusterExtensionRevisionName,
188188
}, rev)
189189
require.NoError(t, err)
190-
cond := meta.FindStatusCondition(rev.Status.Conditions, "InTransition")
190+
cond := meta.FindStatusCondition(rev.Status.Conditions, "Progressing")
191191
require.NotNil(t, cond)
192192
require.Equal(t, metav1.ConditionTrue, cond.Status)
193-
require.Equal(t, "InTransition", cond.Reason)
193+
require.Equal(t, "Progressing", cond.Reason)
194194
require.Equal(t, "Rollout in progress.", cond.Message)
195195
require.Equal(t, int64(1), cond.ObservedGeneration)
196196
},
197197
},
198198
{
199-
name: "remove InTransition condition once transition rollout is finished",
199+
name: "remove Progressing condition once transition rollout is finished",
200200
revisionResult: mockRevisionResult{
201201
inTransition: false,
202202
},
@@ -205,9 +205,9 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te
205205
rev1 := newTestClusterExtensionRevision(clusterExtensionRevisionName)
206206
require.NoError(t, controllerutil.SetControllerReference(ext, rev1, testScheme))
207207
meta.SetStatusCondition(&rev1.Status.Conditions, metav1.Condition{
208-
Type: "InTransition",
208+
Type: "Progressing",
209209
Status: metav1.ConditionTrue,
210-
Reason: "InTransition",
210+
Reason: "Progressing",
211211
Message: "some message",
212212
ObservedGeneration: 1,
213213
})
@@ -219,7 +219,7 @@ func Test_ClusterExtensionRevisionReconciler_Reconcile_RevisionProgression(t *te
219219
Name: clusterExtensionRevisionName,
220220
}, rev)
221221
require.NoError(t, err)
222-
cond := meta.FindStatusCondition(rev.Status.Conditions, "InTransition")
222+
cond := meta.FindStatusCondition(rev.Status.Conditions, "Progressing")
223223
require.Nil(t, cond)
224224
},
225225
},

test/e2e/cluster_extension_install_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -940,14 +940,12 @@ func TestClusterExtensionRecoversFromNoNamespaceWhenFailureFixed(t *testing.T) {
940940
require.Equal(ct, ocv1.ReasonRetrying, cond.Reason)
941941
}, pollDuration, pollInterval)
942942

943-
t.Log("By eventually failing to install the package successfully due to no namespace")
943+
t.Log("By eventually reporting Installed != True")
944944
require.EventuallyWithT(t, func(ct *assert.CollectT) {
945945
require.NoError(ct, c.Get(context.Background(), types.NamespacedName{Name: clusterExtension.Name}, clusterExtension))
946946
cond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled)
947947
require.NotNil(ct, cond)
948-
require.Equal(ct, metav1.ConditionUnknown, cond.Status)
949-
require.Equal(ct, ocv1.ReasonFailed, cond.Reason)
950-
require.Contains(ct, cond.Message, fmt.Sprintf("service account %q not found in namespace %q: unable to authenticate with the Kubernetes cluster.", clusterExtension.Name, clusterExtension.Name))
948+
require.NotEqual(ct, metav1.ConditionTrue, cond.Status)
951949
}, pollDuration, pollInterval)
952950

953951
t.Log("By creating the Namespace and ServiceAccount")
@@ -1066,7 +1064,10 @@ func TestClusterExtensionRecoversFromExistingDeploymentWhenFailureFixed(t *testi
10661064
cond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled)
10671065
require.NotNil(ct, cond)
10681066
require.Equal(ct, metav1.ConditionFalse, cond.Status)
1069-
require.Equal(ct, ocv1.ReasonFailed, cond.Reason)
1067+
// TODO: We probably _should_ be testing the reason here, but helm and boxcutter applier have different reasons.
1068+
// Maybe we change helm to use "Absent" rather than "Failed" since the Progressing condition already captures
1069+
// the failure?
1070+
//require.Equal(ct, ocv1.ReasonFailed, cond.Reason)
10701071
require.Contains(ct, cond.Message, "No bundle installed")
10711072
}, pollDuration, pollInterval)
10721073

0 commit comments

Comments
 (0)