Skip to content

Commit 9970fab

Browse files
authored
Cherry-Pick Retry commit in master (#452)
* fix: retry on unauthorized error when retrieving resources by gvk (#449) * fix: retry on unauthorized when retrieving resources by gvk Signed-off-by: Leonardo Luz Almeida <[email protected]> * add test case to validate retry is just invoked if error is Unauthorized Signed-off-by: Leonardo Luz Almeida <[email protected]> Signed-off-by: Leonardo Luz Almeida <[email protected]> * Fix merge conflict Signed-off-by: Leonardo Luz Almeida <[email protected]> * Fix lint Signed-off-by: Leonardo Luz Almeida <[email protected]> * Fix lint Signed-off-by: Leonardo Luz Almeida <[email protected]> Signed-off-by: Leonardo Luz Almeida <[email protected]>
1 parent a56a803 commit 9970fab

File tree

2 files changed

+214
-1
lines changed

2 files changed

+214
-1
lines changed

pkg/sync/sync_context.go

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ import (
1717
apierr "k8s.io/apimachinery/pkg/api/errors"
1818
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1919
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
20+
"k8s.io/apimachinery/pkg/runtime/schema"
2021
"k8s.io/apimachinery/pkg/util/wait"
2122
"k8s.io/client-go/discovery"
2223
"k8s.io/client-go/dynamic"
2324
"k8s.io/client-go/rest"
25+
"k8s.io/client-go/util/retry"
2426
"k8s.io/klog/v2/klogr"
2527
cmdutil "k8s.io/kubectl/pkg/cmd/util"
2628
"k8s.io/kubectl/pkg/util/openapi"
@@ -697,9 +699,31 @@ func (sc *syncContext) getSyncTasks() (_ syncTasks, successful bool) {
697699
task.liveObj = sc.liveObj(task.targetObj)
698700
}
699701

702+
isRetryable := func(err error) bool {
703+
return apierr.IsUnauthorized(err)
704+
}
705+
706+
serverResCache := make(map[schema.GroupVersionKind]*metav1.APIResource)
707+
700708
// check permissions
701709
for _, task := range tasks {
702-
serverRes, err := kube.ServerResourceForGroupVersionKind(sc.disco, task.groupVersionKind(), "get")
710+
711+
var serverRes *metav1.APIResource
712+
var err error
713+
714+
if val, ok := serverResCache[task.groupVersionKind()]; ok {
715+
serverRes = val
716+
err = nil
717+
} else {
718+
err = retry.OnError(retry.DefaultRetry, isRetryable, func() error {
719+
serverRes, err = kube.ServerResourceForGroupVersionKind(sc.disco, task.groupVersionKind(), "get")
720+
return err
721+
})
722+
if serverRes != nil {
723+
serverResCache[task.groupVersionKind()] = serverRes
724+
}
725+
}
726+
703727
if err != nil {
704728
// Special case for custom resources: if CRD is not yet known by the K8s API server,
705729
// and the CRD is part of this sync or the resource is annotated with SkipDryRunOnMissingResource=true,

pkg/sync/sync_context_test.go

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,19 @@ import (
77
"fmt"
88
apierrors "k8s.io/apimachinery/pkg/api/errors"
99
"k8s.io/apimachinery/pkg/runtime/schema"
10+
"net/http"
11+
"net/http/httptest"
1012
"reflect"
1113
"testing"
1214

1315
"github.com/stretchr/testify/assert"
16+
"github.com/stretchr/testify/require"
1417
corev1 "k8s.io/api/core/v1"
1518
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1619
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1720
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
1821
"k8s.io/apimachinery/pkg/runtime"
22+
"k8s.io/client-go/discovery"
1923
fakedisco "k8s.io/client-go/discovery/fake"
2024
"k8s.io/client-go/dynamic/fake"
2125
"k8s.io/client-go/rest"
@@ -24,6 +28,7 @@ import (
2428

2529
"github.com/argoproj/gitops-engine/pkg/diff"
2630
"github.com/argoproj/gitops-engine/pkg/health"
31+
"github.com/argoproj/gitops-engine/pkg/sync/common"
2732
synccommon "github.com/argoproj/gitops-engine/pkg/sync/common"
2833
"github.com/argoproj/gitops-engine/pkg/utils/kube"
2934
"github.com/argoproj/gitops-engine/pkg/utils/kube/kubetest"
@@ -463,6 +468,190 @@ func TestSyncPruneFailure(t *testing.T) {
463468
assert.Equal(t, "foo", result.Message)
464469
}
465470

471+
type APIServerMock struct {
472+
calls int
473+
errorStatus int
474+
errorBody []byte
475+
}
476+
477+
func (s *APIServerMock) newHttpServer(t *testing.T, apiFailuresCount int) *httptest.Server {
478+
t.Helper()
479+
stable := metav1.APIResourceList{
480+
GroupVersion: "v1",
481+
APIResources: []metav1.APIResource{
482+
{Name: "pods", Namespaced: true, Kind: "Pod"},
483+
{Name: "services", Namespaced: true, Kind: "Service", Verbs: v1.Verbs{"get"}},
484+
{Name: "namespaces", Namespaced: false, Kind: "Namespace"},
485+
},
486+
}
487+
488+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
489+
s.calls++
490+
if s.calls <= apiFailuresCount {
491+
w.Header().Set("Content-Type", "application/json")
492+
w.WriteHeader(s.errorStatus)
493+
w.Write(s.errorBody) // nolint:errcheck
494+
return
495+
}
496+
var list interface{}
497+
switch req.URL.Path {
498+
case "/api/v1":
499+
list = &stable
500+
case "/apis/v1":
501+
list = &stable
502+
default:
503+
t.Logf("unexpected request: %s", req.URL.Path)
504+
w.WriteHeader(http.StatusNotFound)
505+
return
506+
}
507+
508+
output, err := json.Marshal(list)
509+
if err != nil {
510+
t.Errorf("unexpected encoding error: %v", err)
511+
return
512+
}
513+
w.Header().Set("Content-Type", "application/json")
514+
w.WriteHeader(http.StatusOK)
515+
w.Write(output) // nolint:errcheck
516+
}))
517+
return server
518+
}
519+
520+
func TestServerResourcesRetry(t *testing.T) {
521+
type fixture struct {
522+
apiServerMock *APIServerMock
523+
httpServer *httptest.Server
524+
syncCtx *syncContext
525+
}
526+
setup := func(t *testing.T, apiFailuresCount int) *fixture {
527+
t.Helper()
528+
syncCtx := newTestSyncCtx(nil, WithOperationSettings(false, false, false, true))
529+
530+
unauthorizedStatus := &metav1.Status{
531+
Status: metav1.StatusFailure,
532+
Code: http.StatusUnauthorized,
533+
Reason: metav1.StatusReasonUnauthorized,
534+
Message: "some error",
535+
}
536+
unauthorizedJSON, err := json.Marshal(unauthorizedStatus)
537+
if err != nil {
538+
t.Errorf("unexpected encoding error while marshaling unauthorizedStatus: %v", err)
539+
return nil
540+
}
541+
server := &APIServerMock{
542+
errorStatus: http.StatusUnauthorized,
543+
errorBody: unauthorizedJSON,
544+
}
545+
httpServer := server.newHttpServer(t, apiFailuresCount)
546+
547+
syncCtx.disco = discovery.NewDiscoveryClientForConfigOrDie(&rest.Config{Host: httpServer.URL})
548+
testSvc := NewService()
549+
testSvc.SetName("test-service")
550+
testSvc.SetNamespace(FakeArgoCDNamespace)
551+
syncCtx.resources = groupResources(ReconciliationResult{
552+
Live: []*unstructured.Unstructured{testSvc, testSvc, testSvc, testSvc},
553+
Target: []*unstructured.Unstructured{testSvc, testSvc, testSvc, testSvc},
554+
})
555+
return &fixture{
556+
apiServerMock: server,
557+
httpServer: httpServer,
558+
syncCtx: syncCtx,
559+
}
560+
561+
}
562+
type testCase struct {
563+
desc string
564+
apiFailureCount int
565+
apiErrorHTTPStatus int
566+
expectedAPICalls int
567+
expectedResources int
568+
expectedPhase synccommon.OperationPhase
569+
expectedMessage string
570+
}
571+
testCases := []testCase{
572+
{
573+
desc: "will return success when no api failure",
574+
apiFailureCount: 0,
575+
expectedAPICalls: 1,
576+
expectedResources: 1,
577+
expectedPhase: common.OperationSucceeded,
578+
expectedMessage: "success",
579+
},
580+
{
581+
desc: "will return success after 1 api failure attempt",
582+
apiFailureCount: 1,
583+
expectedAPICalls: 2,
584+
expectedResources: 1,
585+
expectedPhase: common.OperationSucceeded,
586+
expectedMessage: "success",
587+
},
588+
{
589+
desc: "will return success after 2 api failure attempt",
590+
apiFailureCount: 2,
591+
expectedAPICalls: 3,
592+
expectedResources: 1,
593+
expectedPhase: common.OperationSucceeded,
594+
expectedMessage: "success",
595+
},
596+
{
597+
desc: "will return success after 3 api failure attempt",
598+
apiFailureCount: 3,
599+
expectedAPICalls: 4,
600+
expectedResources: 1,
601+
expectedPhase: common.OperationSucceeded,
602+
expectedMessage: "success",
603+
},
604+
{
605+
desc: "will return success after 4 api failure attempt",
606+
apiFailureCount: 4,
607+
expectedAPICalls: 5,
608+
expectedResources: 1,
609+
expectedPhase: common.OperationSucceeded,
610+
expectedMessage: "success",
611+
},
612+
{
613+
desc: "will fail after 5 api failure attempt",
614+
apiFailureCount: 5,
615+
expectedAPICalls: 5,
616+
expectedResources: 1,
617+
expectedPhase: common.OperationFailed,
618+
expectedMessage: "not valid",
619+
},
620+
{
621+
desc: "will not retry if returned error is different than Unauthorized",
622+
apiErrorHTTPStatus: http.StatusConflict,
623+
apiFailureCount: 1,
624+
expectedAPICalls: 1,
625+
expectedResources: 1,
626+
expectedPhase: common.OperationFailed,
627+
expectedMessage: "not valid",
628+
},
629+
}
630+
for _, tc := range testCases {
631+
tc := tc
632+
t.Run(tc.desc, func(t *testing.T) {
633+
// Given
634+
t.Parallel()
635+
fixture := setup(t, tc.apiFailureCount)
636+
defer fixture.httpServer.Close()
637+
if tc.apiErrorHTTPStatus != 0 {
638+
fixture.apiServerMock.errorStatus = tc.apiErrorHTTPStatus
639+
}
640+
641+
// When
642+
fixture.syncCtx.Sync()
643+
phase, msg, resources := fixture.syncCtx.GetState()
644+
645+
// Then
646+
assert.Equal(t, tc.expectedAPICalls, fixture.apiServerMock.calls, "api calls mismatch")
647+
assert.Len(t, resources, tc.expectedResources, "resources len mismatch")
648+
assert.Contains(t, msg, tc.expectedMessage, "expected message mismatch")
649+
require.Equal(t, tc.expectedPhase, phase, "expected phase mismatch")
650+
require.Len(t, fixture.syncCtx.syncRes, 1, "sync result len mismatch")
651+
})
652+
}
653+
}
654+
466655
func TestDoNotSyncOrPruneHooks(t *testing.T) {
467656
syncCtx := newTestSyncCtx(nil, WithOperationSettings(false, false, false, true))
468657
targetPod := NewPod()

0 commit comments

Comments
 (0)