Skip to content
This repository was archived by the owner on Jan 13, 2023. It is now read-only.

Commit 60ca76d

Browse files
authored
Merge pull request #47 from redskyops/change/readiness-error
Add a ReadinessError
2 parents e06242d + 7222c70 commit 60ca76d

File tree

3 files changed

+133
-77
lines changed

3 files changed

+133
-77
lines changed

controllers/ready_controller.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package controllers
1818

1919
import (
2020
"context"
21-
"fmt"
2221
"time"
2322

2423
"github.com/go-logr/logr"
@@ -172,17 +171,18 @@ func (r *ReadyReconciler) checkReadiness(ctx context.Context, t *redskyv1alpha1.
172171
// Get the objects to check
173172
ul, err := r.getCheckTargets(ctx, c)
174173
if err != nil {
175-
trial.ApplyCondition(&t.Status, redskyv1alpha1.TrialFailed, corev1.ConditionTrue, "ReadinessCheckFailed", err.Error(), probeTime)
174+
readinessCheckFailed(t, probeTime, err)
176175
err := r.Update(ctx, t)
177176
return controller.RequeueConflict(err)
178177
}
179178

180179
// Check for readiness
181180
if msg, ok, err := checker.check(ctx, c, ul, probeTime); err != nil {
182-
trial.ApplyCondition(&t.Status, redskyv1alpha1.TrialFailed, corev1.ConditionTrue, "ReadinessCheckFailed", err.Error(), probeTime)
181+
readinessCheckFailed(t, probeTime, err)
183182
err := r.Update(ctx, t)
184183
return controller.RequeueConflict(err)
185184
} else if !ok {
185+
// We may overwrite the message as we iterate over the remaining readiness checks
186186
trial.ApplyCondition(&t.Status, redskyv1alpha1.TrialReady, corev1.ConditionFalse, "Waiting", msg, probeTime)
187187
}
188188
}
@@ -235,6 +235,20 @@ func (r *ReadyReconciler) getCheckTargets(ctx context.Context, rc *redskyv1alpha
235235
return ul, nil
236236
}
237237

238+
// readinessCheckFailed puts a trial into a failed state due to a failed readiness check
239+
func readinessCheckFailed(t *redskyv1alpha1.Trial, probeTime *metav1.Time, err error) {
240+
reason, message := "ReadinessCheckFailed", err.Error()
241+
if rerr, ok := err.(*ready.ReadinessError); ok {
242+
if rerr.Reason != "" {
243+
reason = rerr.Reason
244+
}
245+
if rerr.Message != "" {
246+
message = rerr.Message
247+
}
248+
}
249+
trial.ApplyCondition(&t.Status, redskyv1alpha1.TrialFailed, corev1.ConditionTrue, reason, message, probeTime)
250+
}
251+
238252
// readinessChecker is the loop state used to evaluate readiness checks
239253
type readinessChecker struct {
240254
// checker is used to evaluate the conditions of a target
@@ -304,13 +318,13 @@ func (rc *readinessChecker) check(ctx context.Context, c *redskyv1alpha1.Readine
304318
if ok || err != nil {
305319
c.AttemptsRemaining = 0
306320
c.LastCheckTime = nil
307-
return msg, ok, err
321+
return "", ok, err
308322
}
309323

310324
// Check if we exceeded the failure threshold
311325
c.AttemptsRemaining--
312326
if c.AttemptsRemaining <= 0 {
313-
return "", false, fmt.Errorf("%s", msg)
327+
return "", false, &ready.ReadinessError{Reason: "ReadinessFailureThreshold", Message: msg}
314328
}
315329

316330
// Record the fact that we need to re-check

internal/ready/ready.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,25 @@ type ReadinessChecker struct {
5454
Reader client.Reader
5555
}
5656

57+
// ReadinessError is an error that occurs while testing for readiness, it indicates a "hard failure" and is not just
58+
// an indicator that something is not ready (i.e. it is an unrecoverable state and will never be "ready").
59+
type ReadinessError struct {
60+
// Reason is a code indicating the reason why a readiness check failed
61+
Reason string
62+
// Message is a more detailed message indicating the nature of the failure
63+
Message string
64+
65+
error string
66+
}
67+
68+
// Error returns the message
69+
func (e *ReadinessError) Error() string {
70+
if e.error != "" {
71+
return e.error
72+
}
73+
return "readiness check failed"
74+
}
75+
5776
// CheckConditions checks to see that all of the listed conditions have a status of true on the specified object. Note
5877
// that in addition to generically checking in the `status.conditions` field, special conditions are also supported. The
5978
// special conditions are prefixed with "redskyops.dev/".
@@ -88,8 +107,8 @@ func (r *ReadinessChecker) CheckConditions(ctx context.Context, obj *unstructure
88107
}
89108

90109
// Make sure it's not a hard fail
91-
if m, err := r.podFailed(ctx, obj); err != nil {
92-
return m, false, err
110+
if err := r.podFailed(ctx, obj); err != nil {
111+
return "", false, err
93112
}
94113

95114
// Stop checking as soon as a condition is not "True"
@@ -193,11 +212,11 @@ func (r *ReadinessChecker) podReady(ctx context.Context, obj *unstructured.Unstr
193212
}
194213

195214
// podFailed looks for pods that are obviously in a failed state and are unlikely to recover
196-
func (r *ReadinessChecker) podFailed(ctx context.Context, obj *unstructured.Unstructured) (string, error) {
215+
func (r *ReadinessChecker) podFailed(ctx context.Context, obj *unstructured.Unstructured) error {
197216
// Get the list of pods for the object
198217
list, err := r.listPods(ctx, obj)
199218
if err != nil {
200-
return "", err
219+
return err
201220
}
202221

203222
// Iterate over the pods looking for failures
@@ -206,7 +225,7 @@ func (r *ReadinessChecker) podFailed(ctx context.Context, obj *unstructured.Unst
206225
for _, c := range p.Status.Conditions {
207226
// Check for unschedulable pods
208227
if c.Type == corev1.PodScheduled && c.Status == corev1.ConditionFalse && c.Reason == corev1.PodReasonUnschedulable {
209-
return c.Message, fmt.Errorf("%s", c.Reason)
228+
return &ReadinessError{error: "pod unschedulable", Reason: c.Reason, Message: c.Message}
210229
}
211230

212231
// Check the container status
@@ -218,12 +237,12 @@ func (r *ReadinessChecker) podFailed(ctx context.Context, obj *unstructured.Unst
218237
switch {
219238
case cc.State.Waiting != nil:
220239
if cc.RestartCount > 0 && cc.State.Waiting.Reason == "CrashLoopBackOff" {
221-
return cc.State.Waiting.Message, fmt.Errorf("%s", cc.State.Waiting.Reason)
240+
return &ReadinessError{error: "container crash loop back off", Reason: cc.State.Waiting.Reason, Message: cc.State.Waiting.Message}
222241
}
223242

224243
case cc.State.Terminated != nil:
225244
if p.Spec.RestartPolicy == corev1.RestartPolicyNever && cc.RestartCount == 0 && cc.State.Terminated.Reason == "Error" {
226-
return cc.State.Terminated.Message, fmt.Errorf("%s", cc.State.Terminated.Reason)
245+
return &ReadinessError{error: "container error", Reason: cc.State.Terminated.Reason, Message: cc.State.Terminated.Message}
227246
}
228247
}
229248
}
@@ -232,7 +251,7 @@ func (r *ReadinessChecker) podFailed(ctx context.Context, obj *unstructured.Unst
232251
}
233252

234253
// There are no recognizably failed pods
235-
return "", nil
254+
return nil
236255
}
237256

238257
// listPods returns the pods "owned" by the supplied unstructured object

internal/ready/ready_test.go

Lines changed: 87 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import (
2020
"context"
2121
"testing"
2222

23-
. "github.com/onsi/gomega"
23+
"github.com/stretchr/testify/assert"
2424
appsv1 "k8s.io/api/apps/v1"
2525
corev1 "k8s.io/api/core/v1"
2626
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -31,80 +31,103 @@ import (
3131
)
3232

3333
func TestReadinessChecker_CheckConditions(t *testing.T) {
34-
scheme := runtime.NewScheme()
35-
_ = clientgoscheme.AddToScheme(scheme)
36-
37-
t.Run("always-true", withReadinessChecker(scheme,
38-
func(g *WithT, rc *ReadinessChecker, u *unstructured.Unstructured) {
39-
msg, ok, err := rc.CheckConditions(context.TODO(), u, []string{ConditionTypeAlwaysTrue})
40-
g.Expect(err).ShouldNot(HaveOccurred())
41-
g.Expect(ok).To(BeTrue())
42-
g.Expect(msg).To(BeEmpty())
34+
cases := []struct {
35+
desc string
36+
objs []runtime.Object
37+
conditionTypes []string
38+
msg string
39+
ready bool
40+
err error
41+
}{
42+
{
43+
desc: "always-true",
44+
conditionTypes: []string{ConditionTypeAlwaysTrue},
45+
ready: true,
4346
},
44-
))
47+
{
48+
desc: "pod-ready",
49+
conditionTypes: []string{ConditionTypePodReady},
50+
ready: true,
4551

46-
t.Run("pod-ready", withReadinessChecker(scheme,
47-
func(g *WithT, rc *ReadinessChecker, u *unstructured.Unstructured) {
48-
msg, ok, err := rc.CheckConditions(context.TODO(), u, []string{ConditionTypePodReady})
49-
g.Expect(err).ShouldNot(HaveOccurred())
50-
g.Expect(ok).To(BeTrue())
51-
g.Expect(msg).To(BeEmpty())
52-
},
53-
&appsv1.StatefulSet{
54-
Spec: appsv1.StatefulSetSpec{
55-
Selector: &metav1.LabelSelector{
56-
MatchLabels: map[string]string{"test": "test"},
52+
objs: []runtime.Object{
53+
&appsv1.StatefulSet{
54+
Spec: appsv1.StatefulSetSpec{
55+
Selector: &metav1.LabelSelector{
56+
MatchLabels: map[string]string{"test": "test"},
57+
},
58+
},
59+
},
60+
&corev1.Pod{
61+
ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"test": "test"}},
62+
Status: corev1.PodStatus{
63+
Conditions: []corev1.PodCondition{{
64+
Type: corev1.PodReady,
65+
Status: corev1.ConditionTrue,
66+
}},
67+
},
5768
},
5869
},
5970
},
60-
&corev1.Pod{
61-
ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"test": "test"}},
62-
Status: corev1.PodStatus{
63-
Conditions: []corev1.PodCondition{{
64-
Type: corev1.PodReady,
65-
Status: corev1.ConditionTrue,
66-
}},
71+
{
72+
desc: "unschedulable",
73+
conditionTypes: []string{ConditionTypePodReady},
74+
err: &ReadinessError{
75+
Reason: corev1.PodReasonUnschedulable,
76+
error: "pod unschedulable",
6777
},
68-
},
69-
))
7078

71-
t.Run("unschedulable", withReadinessChecker(scheme,
72-
func(g *WithT, rc *ReadinessChecker, u *unstructured.Unstructured) {
73-
_, _, err := rc.CheckConditions(context.TODO(), u, []string{ConditionTypePodReady})
74-
g.Expect(err).Should(MatchError(corev1.PodReasonUnschedulable))
75-
},
76-
&appsv1.Deployment{
77-
Spec: appsv1.DeploymentSpec{
78-
Selector: &metav1.LabelSelector{
79-
MatchLabels: map[string]string{"test": "test"},
79+
objs: []runtime.Object{
80+
&appsv1.Deployment{
81+
Spec: appsv1.DeploymentSpec{
82+
Selector: &metav1.LabelSelector{
83+
MatchLabels: map[string]string{"test": "test"},
84+
},
85+
},
86+
},
87+
&corev1.Pod{
88+
ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"test": "test"}},
89+
Status: corev1.PodStatus{
90+
Conditions: []corev1.PodCondition{{
91+
Type: corev1.PodScheduled,
92+
Status: corev1.ConditionFalse,
93+
Reason: corev1.PodReasonUnschedulable,
94+
}},
95+
},
8096
},
8197
},
8298
},
83-
&corev1.Pod{
84-
ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"test": "test"}},
85-
Status: corev1.PodStatus{
86-
Conditions: []corev1.PodCondition{{
87-
Type: corev1.PodScheduled,
88-
Status: corev1.ConditionFalse,
89-
Reason: corev1.PodReasonUnschedulable,
90-
}},
91-
},
92-
},
93-
))
94-
}
99+
}
100+
101+
ctx := context.TODO()
102+
scheme := runtime.NewScheme()
103+
_ = clientgoscheme.AddToScheme(scheme)
104+
for _, c := range cases {
105+
t.Run(c.desc, func(t *testing.T) {
106+
// Setup a readiness checker for the test case using the supplied objects
107+
u := &unstructured.Unstructured{}
108+
if len(c.objs) > 0 {
109+
if err := scheme.Convert(c.objs[0], u, nil); err != nil {
110+
t.Fatalf("Could not convert to unstructured: %v", err)
111+
}
112+
c.objs = c.objs[1:]
113+
}
114+
rc := &ReadinessChecker{Reader: fake.NewFakeClientWithScheme(scheme, c.objs...)}
95115

96-
// withReadinessChecker wraps a ReadinessChecker test function; the first object will be converted to an unstructured
97-
// object for use as the target object, the remaining objects will be available through the checker's reader
98-
func withReadinessChecker(scheme *runtime.Scheme, f func(*WithT, *ReadinessChecker, *unstructured.Unstructured), objs ...runtime.Object) func(*testing.T) {
99-
return func(t *testing.T) {
100-
u := &unstructured.Unstructured{}
101-
if len(objs) > 0 {
102-
if err := scheme.Convert(objs[0], u, nil); err != nil {
103-
t.Fatalf("Could not convert to unstructured: %v", err)
116+
// Verify the results
117+
msg, ready, err := rc.CheckConditions(ctx, u, c.conditionTypes)
118+
assert.Equal(t, c.ready, ready)
119+
assert.Equal(t, c.msg, msg)
120+
if c.err != nil {
121+
assert.EqualError(t, err, c.err.Error())
122+
if rerr, ok := c.err.(*ReadinessError); ok {
123+
if assert.IsType(t, err, &ReadinessError{}) {
124+
assert.Equal(t, rerr.Reason, err.(*ReadinessError).Reason)
125+
assert.Equal(t, rerr.Message, err.(*ReadinessError).Message)
126+
}
127+
}
128+
} else {
129+
assert.NoError(t, err)
104130
}
105-
objs = objs[1:]
106-
}
107-
reader := fake.NewFakeClientWithScheme(scheme, objs...)
108-
f(NewWithT(t), &ReadinessChecker{Reader: reader}, u)
131+
})
109132
}
110133
}

0 commit comments

Comments
 (0)