Skip to content

Commit b9941ac

Browse files
authored
Merge pull request #46 from 7ing/fix-retry-logic
Add timeout to renewal issuance logic
2 parents ca2cf99 + cc20317 commit b9941ac

File tree

2 files changed

+73
-1
lines changed

2 files changed

+73
-1
lines changed

manager/manager.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ func NewManager(opts Options) (*Manager, error) {
189189
maxRequestsPerVolume: opts.MaxRequestsPerVolume,
190190
nodeNameHash: nodeNameHash,
191191
backoffConfig: *opts.RenewalBackoffConfig,
192+
issueRenewalTimeout: time.Second * 60, // issueRenewalTimeout set to align with NodePublishVolume timeout value
192193
requestNameGenerator: func() string {
193194
return string(uuid.NewUUID())
194195
},
@@ -291,6 +292,9 @@ type Manager struct {
291292
// backoffConfig configures the exponential backoff applied to certificate renewal failures.
292293
backoffConfig wait.Backoff
293294

295+
// issueRenewalTimeout defines timeout value for each issue() call in renewal process
296+
issueRenewalTimeout time.Duration
297+
294298
// requestNameGenerator generates a new random name for a certificaterequest object
295299
// Defaults to uuid.NewUUID() from k8s.io/apimachinery/pkg/util/uuid.
296300
requestNameGenerator func() string
@@ -634,7 +638,9 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
634638
// we'll immediately stop waiting and 'continue' which will then hit the `case <-stopCh` case in the `select`.
635639
if err := wait.ExponentialBackoffWithContext(ctx, m.backoffConfig, func(ctx context.Context) (bool, error) {
636640
log.Info("Triggering new issuance")
637-
if err := m.issue(ctx, volumeID); err != nil {
641+
issueCtx, issueCancel := context.WithTimeout(ctx, m.issueRenewalTimeout)
642+
defer issueCancel()
643+
if err := m.issue(issueCtx, volumeID); err != nil {
638644
log.Error(err, "Failed to issue certificate, retrying after applying exponential backoff")
639645
return false, nil
640646
}

manager/manager_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"encoding/pem"
99
"fmt"
1010
"math/big"
11+
"sync/atomic"
1112
"testing"
1213
"time"
1314

@@ -285,6 +286,71 @@ func TestManager_ManageVolume_beginsManagingAndProceedsIfNotReady(t *testing.T)
285286
}
286287
}
287288

289+
func TestManager_ManageVolume_exponentialBackOffRetryOnIssueErrors(t *testing.T) {
290+
expBackOffDuration := 100 * time.Millisecond
291+
expBackOffCap := 5 * expBackOffDuration
292+
expBackOffFactor := 2.0 // We multiply the 'duration' by 2.0 if the attempt fails/errors
293+
expBackOffJitter := 0.0 // No jitter to the 'duration', so we could calculate number of retries easily
294+
expBackOffSteps := 100 // The maximum number of backoff attempts
295+
issueRenewalTimeout := expBackOffDuration
296+
297+
// Expected number of retries in each expBackOff cycle :=
298+
// ⌈log base expBackOffFactor of (expBackOffCap/expBackOffDuration)⌉
299+
var expectNumOfRetries int32 = 3 // ⌈log2(500/100)⌉
300+
301+
// Because in startRenewalRoutine, ticker := time.NewTicker(time.Second)
302+
// 2 seconds should complete an expBackOff cycle
303+
// ticker start time (1s) + expBackOffCap (0.5s) + expectNumOfRetries (3) * issueRenewalTimeout (0.1)
304+
expectGlobalTimeout := 2 * time.Second
305+
306+
var numOfRetries int32 = 0 // init
307+
308+
opts := newDefaultTestOptions(t)
309+
opts.RenewalBackoffConfig = &wait.Backoff{
310+
Duration: expBackOffDuration,
311+
Cap: expBackOffCap,
312+
Factor: expBackOffFactor,
313+
Jitter: expBackOffJitter,
314+
Steps: expBackOffSteps,
315+
}
316+
opts.ReadyToRequest = func(meta metadata.Metadata) (bool, string) {
317+
// ReadyToRequest will be called by issue()
318+
atomic.AddInt32(&numOfRetries, 1) // run in a goroutine, thus increment it atomically
319+
return true, "" // AlwaysReadyToRequest
320+
}
321+
m, err := NewManager(opts)
322+
m.issueRenewalTimeout = issueRenewalTimeout
323+
if err != nil {
324+
t.Fatal(err)
325+
}
326+
327+
// Register a new volume with the metadata store
328+
store := opts.MetadataReader.(storage.Interface)
329+
meta := metadata.Metadata{
330+
VolumeID: "vol-id",
331+
TargetPath: "/fake/path",
332+
}
333+
store.RegisterMetadata(meta)
334+
// Ensure we stop managing the volume after the test
335+
defer func() {
336+
store.RemoveVolume(meta.VolumeID)
337+
m.UnmanageVolume(meta.VolumeID)
338+
}()
339+
340+
// Put the certificate under management
341+
managed := m.ManageVolume(meta.VolumeID)
342+
if !managed {
343+
t.Errorf("expected management to have started, but it did not")
344+
}
345+
346+
time.Sleep(expectGlobalTimeout)
347+
348+
actualNumOfRetries := atomic.LoadInt32(&numOfRetries) // read atomically
349+
if actualNumOfRetries != expectNumOfRetries {
350+
t.Errorf("expect %d of retires, but got %d", expectNumOfRetries, actualNumOfRetries)
351+
}
352+
}
353+
288354
func TestManager_cleanupStaleRequests(t *testing.T) {
289355
type fields struct {
290356
nodeID string

0 commit comments

Comments
 (0)