Skip to content

Commit c4fd09d

Browse files
authored
Merge pull request kubernetes#89438 from robscott/endpointslice-controller-error-backoff
Lengthening initial backoff time for EndpointSlice controller
2 parents 4ad3268 + 94e5537 commit c4fd09d

File tree

2 files changed

+40
-8
lines changed

2 files changed

+40
-8
lines changed

pkg/controller/endpointslice/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ go_library(
4141
"//staging/src/k8s.io/client-go/tools/record:go_default_library",
4242
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
4343
"//staging/src/k8s.io/component-base/metrics/prometheus/ratelimiter:go_default_library",
44+
"//vendor/golang.org/x/time/rate:go_default_library",
4445
"//vendor/k8s.io/klog:go_default_library",
4546
"//vendor/k8s.io/utils/net:go_default_library",
4647
],

pkg/controller/endpointslice/endpointslice_controller.go

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import (
2020
"fmt"
2121
"time"
2222

23+
"golang.org/x/time/rate"
24+
2325
v1 "k8s.io/api/core/v1"
2426
discovery "k8s.io/api/discovery/v1beta1"
2527
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -47,13 +49,24 @@ const (
4749
// maxRetries is the number of times a service will be retried before it is
4850
// dropped out of the queue. Any sync error, such as a failure to create or
4951
// update an EndpointSlice could trigger a retry. With the current
50-
// rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers
51-
// represent the sequence of delays between successive queuings of a
52-
// service.
52+
// rate-limiter in use (1s*2^(numRetries-1)) the following numbers represent
53+
// the sequence of delays between successive queuings of a service.
5354
//
54-
// 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s,
55-
// 10.2s, 20.4s, 41s, 82s
55+
// 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s, 512s, 1000s (max)
5656
maxRetries = 15
57+
58+
// endpointSliceChangeMinSyncDelay indicates the mininum delay before
59+
// queuing a syncService call after an EndpointSlice changes. If
60+
// endpointUpdatesBatchPeriod is greater than this value, it will be used
61+
// instead. This helps batch processing of changes to multiple
62+
// EndpointSlices.
63+
endpointSliceChangeMinSyncDelay = 1 * time.Second
64+
65+
// defaultSyncBackOff is the default backoff period for syncService calls.
66+
defaultSyncBackOff = 1 * time.Second
67+
// maxSyncBackOff is the max backoff period for syncService calls.
68+
maxSyncBackOff = 100 * time.Second
69+
5770
// controllerName is a unique value used with LabelManagedBy to indicated
5871
// the component managing an EndpointSlice.
5972
controllerName = "endpointslice-controller.k8s.io"
@@ -80,8 +93,19 @@ func NewController(podInformer coreinformers.PodInformer,
8093
endpointslicemetrics.RegisterMetrics()
8194

8295
c := &Controller{
83-
client: client,
84-
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "endpoint_slice"),
96+
client: client,
97+
// This is similar to the DefaultControllerRateLimiter, just with a
98+
// significantly higher default backoff (1s vs 5ms). This controller
99+
// processes events that can require significant EndpointSlice changes,
100+
// such as an update to a Service or Deployment. A more significant
101+
// rate limit back off here helps ensure that the Controller does not
102+
// overwhelm the API Server.
103+
queue: workqueue.NewNamedRateLimitingQueue(workqueue.NewMaxOfRateLimiter(
104+
workqueue.NewItemExponentialFailureRateLimiter(defaultSyncBackOff, maxSyncBackOff),
105+
// 10 qps, 100 bucket size. This is only for retry speed and its
106+
// only the overall factor (not per item).
107+
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)},
108+
), "endpoint_slice"),
85109
workerLoopPeriod: time.Second,
86110
}
87111

@@ -409,7 +433,14 @@ func (c *Controller) queueServiceForEndpointSlice(endpointSlice *discovery.Endpo
409433
utilruntime.HandleError(fmt.Errorf("Couldn't get key for EndpointSlice %+v: %v", endpointSlice, err))
410434
return
411435
}
412-
c.queue.Add(key)
436+
437+
// queue after the max of endpointSliceChangeMinSyncDelay and
438+
// endpointUpdatesBatchPeriod.
439+
delay := endpointSliceChangeMinSyncDelay
440+
if c.endpointUpdatesBatchPeriod > delay {
441+
delay = c.endpointUpdatesBatchPeriod
442+
}
443+
c.queue.AddAfter(key, delay)
413444
}
414445

415446
func (c *Controller) addPod(obj interface{}) {

0 commit comments

Comments
 (0)