Skip to content

Commit 8dddb5e

Browse files
authored
fix: backoff retry and timeouts in HNS restart
Signed-off-by: Evan Baker <[email protected]>
1 parent 9bab884 commit 8dddb5e

File tree

1 file changed

+26
-4
lines changed

1 file changed

+26
-4
lines changed

platform/os_windows.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,13 +308,15 @@ func restartHNS(ctx context.Context) error {
308308
tryStopServiceFn(ctx, service),
309309
retry.UntilSucceeded(),
310310
retry.Context(ctx),
311+
retry.DelayType(retry.BackOffDelay),
311312
)
312313
// Start the service again
313314
log.Printf("Starting HNS service")
314315
_ = retry.Do(
315316
tryStartServiceFn(ctx, service),
316317
retry.UntilSucceeded(),
317318
retry.Context(ctx),
319+
retry.DelayType(retry.BackOffDelay),
318320
)
319321
log.Printf("HNS service started")
320322
return nil
@@ -330,6 +332,14 @@ func tryStartServiceFn(ctx context.Context, service managedService) func() error
330332
shouldStart := func(state svc.State) bool {
331333
return !(state == svc.Running || state == svc.StartPending)
332334
}
335+
var n, max time.Duration = 0, 3
336+
deadline := func(parent context.Context) (context.Context, context.CancelFunc) {
337+
n++
338+
if n > max {
339+
n = max
340+
}
341+
return context.WithTimeout(parent, n*30*time.Second)
342+
}
333343
return func() error {
334344
status, err := service.Query()
335345
if err != nil {
@@ -342,6 +352,8 @@ func tryStartServiceFn(ctx context.Context, service managedService) func() error
342352
}
343353
}
344354
// Wait for the service to start
355+
deadline, cancel := deadline(ctx)
356+
defer cancel()
345357
ticker := time.NewTicker(500 * time.Millisecond) //nolint:gomnd // 500ms
346358
defer ticker.Stop()
347359
for {
@@ -354,8 +366,8 @@ func tryStartServiceFn(ctx context.Context, service managedService) func() error
354366
break
355367
}
356368
select {
357-
case <-ctx.Done():
358-
return errors.Wrap(ctx.Err(), "context cancelled")
369+
case <-deadline.Done():
370+
return errors.Wrap(deadline.Err(), "context cancelled")
359371
case <-ticker.C:
360372
}
361373
}
@@ -367,6 +379,14 @@ func tryStopServiceFn(ctx context.Context, service managedService) func() error
367379
shouldStop := func(state svc.State) bool {
368380
return !(state == svc.Stopped || state == svc.StopPending)
369381
}
382+
var n, max time.Duration = 0, 3
383+
deadline := func(parent context.Context) (context.Context, context.CancelFunc) {
384+
n++
385+
if n > max {
386+
n = max
387+
}
388+
return context.WithTimeout(parent, n*30*time.Second)
389+
}
370390
return func() error {
371391
status, err := service.Query()
372392
if err != nil {
@@ -379,6 +399,8 @@ func tryStopServiceFn(ctx context.Context, service managedService) func() error
379399
}
380400
}
381401
// Wait for the service to stop
402+
deadline, cancel := deadline(ctx)
403+
defer cancel()
382404
ticker := time.NewTicker(500 * time.Millisecond) //nolint:gomnd // 500ms
383405
defer ticker.Stop()
384406
for {
@@ -391,8 +413,8 @@ func tryStopServiceFn(ctx context.Context, service managedService) func() error
391413
break
392414
}
393415
select {
394-
case <-ctx.Done():
395-
return errors.Wrap(ctx.Err(), "context cancelled")
416+
case <-deadline.Done():
417+
return errors.Wrap(deadline.Err(), "context cancelled")
396418
case <-ticker.C:
397419
}
398420
}

0 commit comments

Comments
 (0)