Skip to content

Commit ec0cadd

Browse files
feat: gracefully handle lack of connectivity with Konnect API (#3184)
Co-authored-by: Jintao Zhang <zhangjintao9020@gmail.com>
1 parent e71b704 commit ec0cadd

9 files changed

+484
-35
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@
5959
For this reference to be allowed, a `KongReferenceGrant` resource must be created
6060
in the namespace of the `KongService`, allowing access for the `KongRoute`.
6161
[#3125](https://github.com/Kong/kong-operator/pull/3125)
62+
- Gracefully handle network errors when communicating with Konnect API.
63+
When a network error occurs during Konnect API operations, the operator
64+
will patch the resource status conditions to indicate the failure and
65+
requeue the reconciliation for a later retry.
66+
[#3184](https://github.com/Kong/kong-operator/pull/3184)
6267

6368
### Fixes
6469

controller/konnect/konnectextension_controller.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,14 @@ func (r *KonnectExtensionReconciler) Reconcile(ctx context.Context, req ctrl.Req
371371
return res, err
372372
}
373373

374+
if !cleanup {
375+
// ControlPlane not found and we're not in cleanup mode.
376+
// The controlPlaneRefValid condition has already been set to false in getGatewayKonnectControlPlane.
377+
// We've done the necessary cleanup above, now wait for the CP to be created or the reference to be corrected.
378+
log.Debug(logger, "ControlPlane not found, waiting for it to be available")
379+
return res, nil
380+
}
381+
374382
default:
375383
log.Debug(logger, "ControlPlane not ready yet")
376384
return res, nil

controller/konnect/reconciler_generic.go

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"net/url"
78
"time"
89

910
k8serrors "k8s.io/apimachinery/pkg/api/errors"
@@ -450,6 +451,14 @@ func (r *KonnectEntityReconciler[T, TEnt]) Reconcile(
450451

451452
if controllerutil.RemoveFinalizer(ent, KonnectCleanupFinalizer) {
452453
if err := ops.Delete(ctx, sdk, r.Client, r.MetricRecorder, ent); err != nil {
454+
// If the error was a network error, handle it here, there's no need to proceed,
455+
// as no state has changed.
456+
// Status conditions are updated in handleOpsErr.
457+
var errUrl *url.Error
458+
if errors.As(err, &errUrl) {
459+
return r.handleOpsErr(ctx, ent, errUrl)
460+
}
461+
453462
// If the error is a rate limit error, requeue after the retry-after duration
454463
// instead of returning an error.
455464
if retryAfter, isRateLimited := ops.GetRetryAfterFromRateLimitError(err); isRateLimited {
@@ -485,7 +494,16 @@ func (r *KonnectEntityReconciler[T, TEnt]) Reconcile(
485494
// Handle type specific operations and stop reconciliation if needed.
486495
// This can happen for instance when KongConsumer references credentials Secrets
487496
// that do not exist or populate some Status fields based on Konnect API.
488-
if stop, res, err := handleTypeSpecific(ctx, sdk, r.Client, ent); err != nil || !res.IsZero() || stop {
497+
if stop, res, err := handleTypeSpecific(ctx, sdk, r.Client, ent); err != nil {
498+
// If the error was a network error, handle it here, there's no need to proceed,
499+
// as no state has changed.
500+
// Status conditions are updated in handleOpsErr.
501+
var errUrl *url.Error
502+
if errors.As(err, &errUrl) {
503+
return r.handleOpsErr(ctx, ent, errUrl)
504+
}
505+
return ctrl.Result{}, err
506+
} else if !res.IsZero() || stop {
489507
return res, err
490508
}
491509

@@ -530,22 +548,31 @@ func (r *KonnectEntityReconciler[T, TEnt]) Reconcile(
530548
// Regardless of the error, patch the status as it can contain the Konnect ID,
531549
// Org ID, Server URL and status conditions.
532550
// Konnect ID will be needed for the finalizer to work.
533-
if res, err := patch.ApplyStatusPatchIfNotEmpty(ctx, r.Client, logger, any(ent).(client.Object), obj); err != nil {
551+
if _, err := patch.ApplyStatusPatchIfNotEmpty(ctx, r.Client, logger, any(ent).(client.Object), obj); err != nil {
534552
if k8serrors.IsConflict(err) {
535553
return ctrl.Result{Requeue: true}, nil
536554
}
537555
return ctrl.Result{}, fmt.Errorf("failed to update status after creating object: %w", err)
538-
} else if res != op.Noop {
539-
return ctrl.Result{}, nil
540556
}
541557

542558
if err != nil {
559+
var (
560+
errUrl *url.Error
561+
rateLimitErr ops.RateLimitError
562+
)
563+
switch {
564+
// If the error was a network error, handle it here, there's no need to proceed,
565+
// as no state has changed.
566+
// Status conditions are updated in handleOpsErr.
567+
case errors.As(err, &errUrl):
568+
return r.handleOpsErr(ctx, ent, errUrl)
569+
543570
// If the error is a rate limit error, requeue after the retry-after duration
544571
// instead of returning an error.
545-
var rateLimitErr ops.RateLimitError
546-
if errors.As(err, &rateLimitErr) {
572+
case errors.As(err, &rateLimitErr):
547573
return ctrl.Result{RequeueAfter: rateLimitErr.RetryAfter}, nil
548574
}
575+
549576
return ctrl.Result{}, ops.FailedKonnectOpError[T]{
550577
Op: ops.CreateOp,
551578
Err: err,
@@ -557,6 +584,7 @@ func (r *KonnectEntityReconciler[T, TEnt]) Reconcile(
557584
}
558585

559586
res, err = ops.Update(ctx, sdk, r.SyncPeriod, r.Client, r.MetricRecorder, ent)
587+
560588
// Set the server URL and org ID regardless of the error.
561589
setStatusServerURLAndOrgID(ent, server, apiAuth.Status.OrganizationID)
562590
// Update the status of the object regardless of the error.
@@ -567,13 +595,25 @@ func (r *KonnectEntityReconciler[T, TEnt]) Reconcile(
567595
return ctrl.Result{}, fmt.Errorf("failed to update in cluster resource after Konnect update: %w %w", errUpd, err)
568596
}
569597
if err != nil {
598+
logger.Error(err, "failed to update")
599+
600+
var (
601+
errUrl *url.Error
602+
rateLimitErr ops.RateLimitError
603+
)
604+
switch {
605+
// If the error was a network error, handle it here, there's no need to proceed,
606+
// as no state has changed.
607+
// Status conditions are updated in handleOpsErr.
608+
case errors.As(err, &errUrl):
609+
return r.handleOpsErr(ctx, ent, errUrl)
610+
570611
// If the error is a rate limit error, requeue after the retry-after duration
571612
// instead of returning an error.
572-
var rateLimitErr ops.RateLimitError
573-
if errors.As(err, &rateLimitErr) {
613+
case errors.As(err, &rateLimitErr):
574614
return ctrl.Result{RequeueAfter: rateLimitErr.RetryAfter}, nil
575615
}
576-
logger.Error(err, "failed to update")
616+
577617
} else if !res.IsZero() {
578618
return res, nil
579619
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package konnect
2+
3+
import (
4+
"context"
5+
"net/url"
6+
7+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
8+
ctrl "sigs.k8s.io/controller-runtime"
9+
10+
konnectv1alpha1 "github.com/kong/kong-operator/api/konnect/v1alpha1"
11+
"github.com/kong/kong-operator/controller/consts"
12+
"github.com/kong/kong-operator/controller/pkg/patch"
13+
)
14+
15+
// handleOpsErr handles network errors.
16+
// If the error is a network error, it patches the status condition
17+
// of the Konnect entity to reflect the failure to communicate with the Konnect API.
18+
func (r *KonnectEntityReconciler[T, TEnt]) handleOpsErr(
19+
ctx context.Context, ent TEnt, errUrl *url.Error,
20+
) (ctrl.Result, error) {
21+
if errUrl == nil {
22+
return ctrl.Result{}, nil
23+
}
24+
25+
if res, err := patch.StatusWithCondition(ctx, r.Client, ent,
26+
konnectv1alpha1.KonnectEntityProgrammedConditionType,
27+
metav1.ConditionFalse,
28+
konnectv1alpha1.KonnectEntityProgrammedReasonKonnectAPIOpFailed,
29+
errUrl.Error(),
30+
); err != nil || !res.IsZero() {
31+
return res, err
32+
}
33+
34+
// After patching the condition, requeue without backoff to retry the network operation.
35+
return ctrl.Result{RequeueAfter: consts.RequeueWithoutBackoff}, nil
36+
}
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package konnect
2+
3+
import (
4+
"context"
5+
"errors"
6+
"net/url"
7+
"testing"
8+
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
12+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
ctrl "sigs.k8s.io/controller-runtime"
14+
"sigs.k8s.io/controller-runtime/pkg/client"
15+
"sigs.k8s.io/controller-runtime/pkg/client/fake"
16+
"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
17+
18+
configurationv1alpha1 "github.com/kong/kong-operator/api/configuration/v1alpha1"
19+
konnectv1alpha1 "github.com/kong/kong-operator/api/konnect/v1alpha1"
20+
"github.com/kong/kong-operator/controller/consts"
21+
"github.com/kong/kong-operator/modules/manager/scheme"
22+
k8sutils "github.com/kong/kong-operator/pkg/utils/kubernetes"
23+
)
24+
25+
func TestHandleOpsErr(t *testing.T) {
26+
tests := []struct {
27+
name string
28+
inputErr *url.Error
29+
expectedResult ctrl.Result
30+
expectedErr bool
31+
expectedErrMsg string
32+
expectConditionPatched bool
33+
interceptorFunc interceptor.Funcs
34+
}{
35+
{
36+
name: "nil error returns empty result",
37+
inputErr: nil,
38+
expectedResult: ctrl.Result{},
39+
expectedErr: false,
40+
},
41+
{
42+
name: "url.Error patches status condition and returns nil error",
43+
inputErr: &url.Error{
44+
Op: "Get",
45+
URL: "https://api.konghq.com",
46+
Err: errors.New("connection refused"),
47+
},
48+
expectedResult: ctrl.Result{RequeueAfter: consts.RequeueWithoutBackoff},
49+
expectedErr: false,
50+
expectConditionPatched: true,
51+
},
52+
{
53+
name: "url.Error with patch conflict returns requeue",
54+
inputErr: &url.Error{
55+
Op: "Post",
56+
URL: "https://api.konghq.com",
57+
Err: errors.New("timeout"),
58+
},
59+
expectedResult: ctrl.Result{Requeue: true},
60+
expectedErr: false,
61+
interceptorFunc: interceptor.Funcs{
62+
SubResourcePatch: func(
63+
ctx context.Context,
64+
client client.Client,
65+
subResourceName string,
66+
obj client.Object,
67+
patch client.Patch,
68+
opts ...client.SubResourcePatchOption,
69+
) error {
70+
return &k8serrors.StatusError{
71+
ErrStatus: metav1.Status{
72+
Status: metav1.StatusFailure,
73+
Reason: metav1.StatusReasonConflict,
74+
},
75+
}
76+
},
77+
},
78+
},
79+
{
80+
name: "wrapped url.Error is handled correctly",
81+
inputErr: &url.Error{
82+
Op: "Get",
83+
URL: "https://api.konghq.com",
84+
Err: errors.New("no such host"),
85+
},
86+
expectedResult: ctrl.Result{RequeueAfter: consts.RequeueWithoutBackoff},
87+
expectedErr: false,
88+
expectConditionPatched: true,
89+
},
90+
}
91+
92+
for _, tt := range tests {
93+
t.Run(tt.name, func(t *testing.T) {
94+
ctx := context.Background()
95+
96+
ent := &configurationv1alpha1.KongService{
97+
ObjectMeta: metav1.ObjectMeta{
98+
Name: "test-service",
99+
Namespace: "default",
100+
Generation: 1,
101+
},
102+
}
103+
104+
clientBuilder := fake.NewClientBuilder().
105+
WithObjects(ent).
106+
WithStatusSubresource(ent).
107+
WithScheme(scheme.Get())
108+
109+
if tt.interceptorFunc.SubResourcePatch != nil {
110+
clientBuilder = clientBuilder.WithInterceptorFuncs(tt.interceptorFunc)
111+
}
112+
113+
cl := clientBuilder.Build()
114+
115+
r := &KonnectEntityReconciler[
116+
configurationv1alpha1.KongService, *configurationv1alpha1.KongService,
117+
]{
118+
Client: cl,
119+
}
120+
121+
result, err := r.handleOpsErr(ctx, ent, tt.inputErr)
122+
123+
assert.Equal(t, tt.expectedResult, result)
124+
125+
if tt.expectedErr {
126+
require.Error(t, err)
127+
if tt.expectedErrMsg != "" {
128+
assert.Contains(t, err.Error(), tt.expectedErrMsg)
129+
}
130+
return
131+
}
132+
133+
require.NoError(t, err)
134+
135+
if tt.expectConditionPatched {
136+
cond, ok := k8sutils.GetCondition(
137+
konnectv1alpha1.KonnectEntityProgrammedConditionType, ent,
138+
)
139+
require.True(t, ok, "expected condition to be set")
140+
assert.Equal(t, metav1.ConditionFalse, cond.Status)
141+
assert.Equal(t,
142+
string(konnectv1alpha1.KonnectEntityProgrammedReasonKonnectAPIOpFailed),
143+
cond.Reason,
144+
)
145+
}
146+
})
147+
}
148+
}

controller/pkg/patch/patch.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ func ApplyStatusPatchIfNotEmpty[
6666
) (res op.Result, err error) {
6767
// Check if the patch to be applied is empty.
6868
patch := client.MergeFrom(oldExisting)
69+
70+
// NOTE: Code below is not 100% correct as it generates a diff for whole object,
71+
// not only for status subresource. However, controller-runtime does not provide
72+
// an easy way to do that, so for now we rely on the fact that only status is
73+
// being modified before calling this function.
74+
// In future we might want to implement a custom patcher that would only
75+
// consider status field.
6976
b, err := patch.Data(existing)
7077
if err != nil {
7178
return op.Noop, fmt.Errorf("failed to generate patch for %T %s: %w",

0 commit comments

Comments
 (0)