From 25fd4e101c33dc6f7c0c387358cac330da471e73 Mon Sep 17 00:00:00 2001 From: leonsteinhaeuser Date: Mon, 3 Jun 2024 22:14:40 +0200 Subject: [PATCH 1/5] feat: added failure count status field --- api/v1/passboltsecret_types.go | 5 +++++ .../crd/bases/passbolt.tagesspiegel.de_passboltsecrets.yaml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/api/v1/passboltsecret_types.go b/api/v1/passboltsecret_types.go index de63b5e5..54c26030 100644 --- a/api/v1/passboltsecret_types.go +++ b/api/v1/passboltsecret_types.go @@ -112,6 +112,11 @@ type PassboltSecretStatus struct { LastSync metav1.Time `json:"lastSync"` // SyncErrors is a list of errors that occurred during the last sync. SyncErrors []SyncError `json:"syncErrors,omitempty"` + // FailureCount is the number of times the secret failed to sync. + // This is used to determine if the secret should be retried. + // +kubebuilder:validation:Optional + // +kubebuilder:default=0 + FailureCount int `json:"failureCount,omitempty"` } //+kubebuilder:object:root=true diff --git a/config/crd/bases/passbolt.tagesspiegel.de_passboltsecrets.yaml b/config/crd/bases/passbolt.tagesspiegel.de_passboltsecrets.yaml index ee80ece8..fe5a88cc 100644 --- a/config/crd/bases/passbolt.tagesspiegel.de_passboltsecrets.yaml +++ b/config/crd/bases/passbolt.tagesspiegel.de_passboltsecrets.yaml @@ -96,6 +96,11 @@ spec: status: description: PassboltSecretStatus defines the observed state of PassboltSecret properties: + failureCount: + default: 0 + description: FailureCount is the number of times the secret failed + to sync. This is used to determine if the secret should be retried. + type: integer lastSync: description: LastSync is the last time the secret was synced from passbolt. From 88c9e699ca28d3cb98cd029dd6bf1afafb6eb077 Mon Sep 17 00:00:00 2001 From: leonsteinhaeuser Date: Mon, 3 Jun 2024 22:15:06 +0200 Subject: [PATCH 2/5] feat: implemented secret sync backoff handling --- internal/controller/passboltsecret_controller.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/internal/controller/passboltsecret_controller.go b/internal/controller/passboltsecret_controller.go index c2e7ac90..083d1241 100644 --- a/internal/controller/passboltsecret_controller.go +++ b/internal/controller/passboltsecret_controller.go @@ -83,6 +83,13 @@ func (r *PassboltSecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque } return errResult, err } + + if secret.Status.FailureCount >= 3 { + // if the secret failed to sync more than 3 times, we stop trying + logr.Info("secret failed to sync more than 3 times. stopping sync", "name", secret.GetName(), "namespace", secret.GetNamespace()) + return ctrl.Result{}, nil + } + // cleanup status secret.Status.SyncErrors = []passboltv1.SyncError{} @@ -94,6 +101,7 @@ func (r *PassboltSecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque if secret.Spec.SecretType != corev1.SecretTypeOpaque && secret.Spec.SecretType != corev1.SecretTypeDockerConfigJson { logr.Info("unsupported secret type", "type", secret.Spec.SecretType) secret.Status.SyncStatus = passboltv1.SyncStatusError + secret.Status.FailureCount++ secret.Status.SyncErrors = append(secret.Status.SyncErrors, passboltv1.SyncError{ Message: fmt.Sprintf("unsupported secret type %q", secret.Spec.SecretType), Time: metav1.Now(), @@ -120,6 +128,7 @@ func (r *PassboltSecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque if err != nil { if snErr, ok := err.(passboltv1.SyncError); ok { secret.Status.SyncStatus = passboltv1.SyncStatusError + secret.Status.FailureCount++ secret.Status.SyncErrors = append(secret.Status.SyncErrors, snErr) if err := r.Client.Status().Update(ctx, secret); err != nil { return errResult, err From 9b989723837c59c352ffdb4fc4791945694896c0 Mon Sep 17 00:00:00 2001 From: leonsteinhaeuser Date: Mon, 10 Jun 2024 10:30:04 +0200 Subject: [PATCH 3/5] feat: implemented exponential failure backoff --- internal/controller/passboltsecret_controller.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/controller/passboltsecret_controller.go b/internal/controller/passboltsecret_controller.go index 083d1241..aac4ea1a 100644 --- a/internal/controller/passboltsecret_controller.go +++ b/internal/controller/passboltsecret_controller.go @@ -89,6 +89,7 @@ func (r *PassboltSecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque logr.Info("secret failed to sync more than 3 times. stopping sync", "name", secret.GetName(), "namespace", secret.GetNamespace()) return ctrl.Result{}, nil } + errResult.RequeueAfter = time.Duration(2^secret.Status.FailureCount) * (5 * time.Second) // cleanup status secret.Status.SyncErrors = []passboltv1.SyncError{} From aca35e963f6e73710f47e1814c92df4bbbd95195 Mon Sep 17 00:00:00 2001 From: leonsteinhaeuser Date: Tue, 11 Jun 2024 09:43:04 +0200 Subject: [PATCH 4/5] fix: backoff retry behavior --- internal/controller/passboltsecret_controller.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/internal/controller/passboltsecret_controller.go b/internal/controller/passboltsecret_controller.go index aac4ea1a..71ad6fc1 100644 --- a/internal/controller/passboltsecret_controller.go +++ b/internal/controller/passboltsecret_controller.go @@ -84,11 +84,7 @@ func (r *PassboltSecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque return errResult, err } - if secret.Status.FailureCount >= 3 { - // if the secret failed to sync more than 3 times, we stop trying - logr.Info("secret failed to sync more than 3 times. stopping sync", "name", secret.GetName(), "namespace", secret.GetNamespace()) - return ctrl.Result{}, nil - } + // limit the number of retries errResult.RequeueAfter = time.Duration(2^secret.Status.FailureCount) * (5 * time.Second) // cleanup status @@ -127,6 +123,11 @@ func (r *PassboltSecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque opRslt, err := controllerutil.CreateOrUpdate(ctx, r.Client, k8sSecret, util.UpdateSecret(ctx, r.PassboltClient, r.Scheme, secret, k8sSecret)) if err != nil { + if secret.Status.FailureCount >= 3 { + // if the secret failed to sync more than 3 times, we stop trying + logr.Info("secret failed to sync more than 3 times. stopping sync", "name", secret.GetName(), "namespace", secret.GetNamespace()) + return ctrl.Result{}, nil + } if snErr, ok := err.(passboltv1.SyncError); ok { secret.Status.SyncStatus = passboltv1.SyncStatusError secret.Status.FailureCount++ @@ -138,6 +139,8 @@ func (r *PassboltSecretReconciler) Reconcile(ctx context.Context, req ctrl.Reque } return errResult, err } + // reset failure counter to 0 + secret.Status.FailureCount = 0 // if the secret was not changed and the status is already success, we can skip the update if opRslt == controllerutil.OperationResultNone && secret.Status.SyncStatus == passboltv1.SyncStatusSuccess { From d8d334d6e45e07ca181a9e869c5f2ce7bc49e1a2 Mon Sep 17 00:00:00 2001 From: leonsteinhaeuser Date: Wed, 26 Jun 2024 14:06:23 +0200 Subject: [PATCH 5/5] feat: added backoff failure test --- e2e/lib.sh | 22 ++++++++++++++++++++++ e2e/run.sh | 21 +++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/e2e/lib.sh b/e2e/lib.sh index edd94651..e1a2766b 100644 --- a/e2e/lib.sh +++ b/e2e/lib.sh @@ -143,3 +143,25 @@ EOF )" sleep 5 } + +# createPassboltSecretV1 +function createPassboltSecretV1WithSecretNotFound() { + createPassboltSecret "$(cat <