@@ -75,6 +75,9 @@ const (
75
75
// controllerName is a unique value used with LabelManagedBy to indicated
76
76
// the component managing an EndpointSlice.
77
77
controllerName = "endpointslice-controller.k8s.io"
78
+
79
+ // topologyQueueItemKey is the key for all items in the topologyQueue.
80
+ topologyQueueItemKey = "topologyQueueItemKey"
78
81
)
79
82
80
83
// NewController creates and initializes a new Controller
@@ -99,7 +102,7 @@ func NewController(ctx context.Context, podInformer coreinformers.PodInformer,
99
102
// such as an update to a Service or Deployment. A more significant
100
103
// rate limit back off here helps ensure that the Controller does not
101
104
// overwhelm the API Server.
102
- queue : workqueue .NewTypedRateLimitingQueueWithConfig (
105
+ serviceQueue : workqueue .NewTypedRateLimitingQueueWithConfig (
103
106
workqueue .NewTypedMaxOfRateLimiter (
104
107
workqueue .NewTypedItemExponentialFailureRateLimiter [string ](defaultSyncBackOff , maxSyncBackOff ),
105
108
// 10 qps, 100 bucket size. This is only for retry speed and its
@@ -110,6 +113,9 @@ func NewController(ctx context.Context, podInformer coreinformers.PodInformer,
110
113
Name : "endpoint_slice" ,
111
114
},
112
115
),
116
+ topologyQueue : workqueue.NewTypedRateLimitingQueue [string ](
117
+ workqueue .DefaultTypedControllerRateLimiter [string ](),
118
+ ),
113
119
workerLoopPeriod : time .Second ,
114
120
}
115
121
@@ -158,14 +164,14 @@ func NewController(ctx context.Context, podInformer coreinformers.PodInformer,
158
164
159
165
if utilfeature .DefaultFeatureGate .Enabled (features .TopologyAwareHints ) {
160
166
nodeInformer .Informer ().AddEventHandler (cache.ResourceEventHandlerFuncs {
161
- AddFunc : func (obj interface {}) {
162
- c .addNode (logger , obj )
167
+ AddFunc : func (_ interface {}) {
168
+ c .addNode ()
163
169
},
164
170
UpdateFunc : func (oldObj , newObj interface {}) {
165
- c .updateNode (logger , oldObj , newObj )
171
+ c .updateNode (oldObj , newObj )
166
172
},
167
- DeleteFunc : func (obj interface {}) {
168
- c .deleteNode (logger , obj )
173
+ DeleteFunc : func (_ interface {}) {
174
+ c .deleteNode ()
169
175
},
170
176
})
171
177
@@ -236,7 +242,11 @@ type Controller struct {
236
242
// more often than services with few pods; it also would cause a
237
243
// service that's inserted multiple times to be processed more than
238
244
// necessary.
239
- queue workqueue.TypedRateLimitingInterface [string ]
245
+ serviceQueue workqueue.TypedRateLimitingInterface [string ]
246
+
247
+ // topologyQueue is used to trigger a topology cache update and checking node
248
+ // topology distribution.
249
+ topologyQueue workqueue.TypedRateLimitingInterface [string ]
240
250
241
251
// maxEndpointsPerSlice references the maximum number of endpoints that
242
252
// should be added to an EndpointSlice
@@ -264,7 +274,8 @@ func (c *Controller) Run(ctx context.Context, workers int) {
264
274
c .eventBroadcaster .StartRecordingToSink (& v1core.EventSinkImpl {Interface : c .client .CoreV1 ().Events ("" )})
265
275
defer c .eventBroadcaster .Shutdown ()
266
276
267
- defer c .queue .ShutDown ()
277
+ defer c .serviceQueue .ShutDown ()
278
+ defer c .topologyQueue .ShutDown ()
268
279
269
280
logger := klog .FromContext (ctx )
270
281
logger .Info ("Starting endpoint slice controller" )
@@ -274,52 +285,69 @@ func (c *Controller) Run(ctx context.Context, workers int) {
274
285
return
275
286
}
276
287
277
- logger .V (2 ).Info ("Starting worker threads" , "total" , workers )
288
+ logger .V (2 ).Info ("Starting service queue worker threads" , "total" , workers )
278
289
for i := 0 ; i < workers ; i ++ {
279
- go wait .Until (func () { c .worker (logger ) }, c .workerLoopPeriod , ctx .Done ())
290
+ go wait .Until (func () { c .serviceQueueWorker (logger ) }, c .workerLoopPeriod , ctx .Done ())
280
291
}
292
+ logger .V (2 ).Info ("Starting topology queue worker threads" , "total" , 1 )
293
+ go wait .Until (func () { c .topologyQueueWorker (logger ) }, c .workerLoopPeriod , ctx .Done ())
281
294
282
295
<- ctx .Done ()
283
296
}
284
297
285
- // worker runs a worker thread that just dequeues items, processes them, and
286
- // marks them done. You may run as many of these in parallel as you wish; the
287
- // workqueue guarantees that they will not end up processing the same service
288
- // at the same time
289
- func (c * Controller ) worker (logger klog.Logger ) {
290
- for c .processNextWorkItem (logger ) {
298
+ // serviceQueueWorker runs a worker thread that just dequeues items, processes
299
+ // them, and marks them done. You may run as many of these in parallel as you
300
+ // wish; the workqueue guarantees that they will not end up processing the same
301
+ // service at the same time
302
+ func (c * Controller ) serviceQueueWorker (logger klog.Logger ) {
303
+ for c .processNextServiceWorkItem (logger ) {
291
304
}
292
305
}
293
306
294
- func (c * Controller ) processNextWorkItem (logger klog.Logger ) bool {
295
- cKey , quit := c .queue .Get ()
307
+ func (c * Controller ) processNextServiceWorkItem (logger klog.Logger ) bool {
308
+ cKey , quit := c .serviceQueue .Get ()
296
309
if quit {
297
310
return false
298
311
}
299
- defer c .queue .Done (cKey )
312
+ defer c .serviceQueue .Done (cKey )
300
313
301
314
err := c .syncService (logger , cKey )
302
315
c .handleErr (logger , err , cKey )
303
316
304
317
return true
305
318
}
306
319
320
+ func (c * Controller ) topologyQueueWorker (logger klog.Logger ) {
321
+ for c .processNextTopologyWorkItem (logger ) {
322
+ }
323
+ }
324
+
325
+ func (c * Controller ) processNextTopologyWorkItem (logger klog.Logger ) bool {
326
+ key , quit := c .topologyQueue .Get ()
327
+ if quit {
328
+ return false
329
+ }
330
+ defer c .topologyQueue .Done (key )
331
+ c .checkNodeTopologyDistribution (logger )
332
+ return true
333
+ }
334
+
307
335
func (c * Controller ) handleErr (logger klog.Logger , err error , key string ) {
308
336
trackSync (err )
309
337
310
338
if err == nil {
311
- c .queue .Forget (key )
339
+ c .serviceQueue .Forget (key )
312
340
return
313
341
}
314
342
315
- if c .queue .NumRequeues (key ) < maxRetries {
343
+ if c .serviceQueue .NumRequeues (key ) < maxRetries {
316
344
logger .Info ("Error syncing endpoint slices for service, retrying" , "key" , key , "err" , err )
317
- c .queue .AddRateLimited (key )
345
+ c .serviceQueue .AddRateLimited (key )
318
346
return
319
347
}
320
348
321
349
logger .Info ("Retry budget exceeded, dropping service out of the queue" , "key" , key , "err" , err )
322
- c .queue .Forget (key )
350
+ c .serviceQueue .Forget (key )
323
351
utilruntime .HandleError (err )
324
352
}
325
353
@@ -416,7 +444,7 @@ func (c *Controller) onServiceUpdate(obj interface{}) {
416
444
return
417
445
}
418
446
419
- c .queue .Add (key )
447
+ c .serviceQueue .Add (key )
420
448
}
421
449
422
450
// onServiceDelete removes the Service Selector from the cache and queues the Service for processing.
@@ -427,7 +455,7 @@ func (c *Controller) onServiceDelete(obj interface{}) {
427
455
return
428
456
}
429
457
430
- c .queue .Add (key )
458
+ c .serviceQueue .Add (key )
431
459
}
432
460
433
461
// onEndpointSliceAdd queues a sync for the relevant Service for a sync if the
@@ -500,7 +528,7 @@ func (c *Controller) queueServiceForEndpointSlice(endpointSlice *discovery.Endpo
500
528
if c .endpointUpdatesBatchPeriod > delay {
501
529
delay = c .endpointUpdatesBatchPeriod
502
530
}
503
- c .queue .AddAfter (key , delay )
531
+ c .serviceQueue .AddAfter (key , delay )
504
532
}
505
533
506
534
func (c * Controller ) addPod (obj interface {}) {
@@ -511,14 +539,14 @@ func (c *Controller) addPod(obj interface{}) {
511
539
return
512
540
}
513
541
for key := range services {
514
- c .queue .AddAfter (key , c .endpointUpdatesBatchPeriod )
542
+ c .serviceQueue .AddAfter (key , c .endpointUpdatesBatchPeriod )
515
543
}
516
544
}
517
545
518
546
func (c * Controller ) updatePod (old , cur interface {}) {
519
547
services := endpointsliceutil .GetServicesToUpdateOnPodChange (c .serviceLister , old , cur )
520
548
for key := range services {
521
- c .queue .AddAfter (key , c .endpointUpdatesBatchPeriod )
549
+ c .serviceQueue .AddAfter (key , c .endpointUpdatesBatchPeriod )
522
550
}
523
551
}
524
552
@@ -531,24 +559,24 @@ func (c *Controller) deletePod(obj interface{}) {
531
559
}
532
560
}
533
561
534
- func (c * Controller ) addNode (logger klog. Logger , obj interface {} ) {
535
- c .checkNodeTopologyDistribution ( logger )
562
+ func (c * Controller ) addNode () {
563
+ c .topologyQueue . Add ( topologyQueueItemKey )
536
564
}
537
565
538
- func (c * Controller ) updateNode (logger klog. Logger , old , cur interface {}) {
566
+ func (c * Controller ) updateNode (old , cur interface {}) {
539
567
oldNode := old .(* v1.Node )
540
568
curNode := cur .(* v1.Node )
541
569
542
570
// LabelTopologyZone may be added by cloud provider asynchronously after the Node is created.
543
571
// The topology cache should be updated in this case.
544
572
if isNodeReady (oldNode ) != isNodeReady (curNode ) ||
545
573
oldNode .Labels [v1 .LabelTopologyZone ] != curNode .Labels [v1 .LabelTopologyZone ] {
546
- c .checkNodeTopologyDistribution ( logger )
574
+ c .topologyQueue . Add ( topologyQueueItemKey )
547
575
}
548
576
}
549
577
550
- func (c * Controller ) deleteNode (logger klog. Logger , obj interface {} ) {
551
- c .checkNodeTopologyDistribution ( logger )
578
+ func (c * Controller ) deleteNode () {
579
+ c .topologyQueue . Add ( topologyQueueItemKey )
552
580
}
553
581
554
582
// checkNodeTopologyDistribution updates Nodes in the topology cache and then
@@ -566,7 +594,7 @@ func (c *Controller) checkNodeTopologyDistribution(logger klog.Logger) {
566
594
serviceKeys := c .topologyCache .GetOverloadedServices ()
567
595
for _ , serviceKey := range serviceKeys {
568
596
logger .V (2 ).Info ("Queuing Service after Node change due to overloading" , "key" , serviceKey )
569
- c .queue .Add (serviceKey )
597
+ c .serviceQueue .Add (serviceKey )
570
598
}
571
599
}
572
600
0 commit comments