@@ -56,6 +56,8 @@ const (
56
56
// Limit is required to avoid memory spikes during cache initialization.
57
57
// The default limit of 50 is chosen based on experiments.
58
58
defaultListSemaphoreWeight = 50
59
+ // defaultListItemSemaphoreWeight limits the amount of items to process in parallel for each k8s list.
60
+ defaultListItemSemaphoreWeight = int64 (1 )
59
61
// defaultEventProcessingInterval is the default interval for processing events
60
62
defaultEventProcessingInterval = 100 * time .Millisecond
61
63
)
@@ -164,15 +166,16 @@ type ListRetryFunc func(err error) bool
164
166
func NewClusterCache (config * rest.Config , opts ... UpdateSettingsFunc ) * clusterCache {
165
167
log := textlogger .NewLogger (textlogger .NewConfig ())
166
168
cache := & clusterCache {
167
- settings : Settings {ResourceHealthOverride : & noopSettings {}, ResourcesFilter : & noopSettings {}},
168
- apisMeta : make (map [schema.GroupKind ]* apiMeta ),
169
- eventMetaCh : nil ,
170
- listPageSize : defaultListPageSize ,
171
- listPageBufferSize : defaultListPageBufferSize ,
172
- listSemaphore : semaphore .NewWeighted (defaultListSemaphoreWeight ),
173
- resources : make (map [kube.ResourceKey ]* Resource ),
174
- nsIndex : make (map [string ]map [kube.ResourceKey ]* Resource ),
175
- config : config ,
169
+ settings : Settings {ResourceHealthOverride : & noopSettings {}, ResourcesFilter : & noopSettings {}},
170
+ apisMeta : make (map [schema.GroupKind ]* apiMeta ),
171
+ eventMetaCh : nil ,
172
+ listPageSize : defaultListPageSize ,
173
+ listPageBufferSize : defaultListPageBufferSize ,
174
+ listSemaphore : semaphore .NewWeighted (defaultListSemaphoreWeight ),
175
+ listItemSemaphoreWeight : defaultListItemSemaphoreWeight ,
176
+ resources : make (map [kube.ResourceKey ]* Resource ),
177
+ nsIndex : make (map [string ]map [kube.ResourceKey ]* Resource ),
178
+ config : config ,
176
179
kubectl : & kube.KubectlCmd {
177
180
Log : log ,
178
181
Tracer : tracing.NopTracer {},
@@ -219,8 +222,9 @@ type clusterCache struct {
219
222
// size of a page for list operations pager.
220
223
listPageSize int64
221
224
// number of pages to prefetch for list pager.
222
- listPageBufferSize int32
223
- listSemaphore WeightedSemaphore
225
+ listPageBufferSize int32
226
+ listSemaphore WeightedSemaphore
227
+ listItemSemaphoreWeight int64
224
228
225
229
// retry options for list operations
226
230
listRetryLimit int32
@@ -262,6 +266,35 @@ type clusterCacheSync struct {
262
266
resyncTimeout time.Duration
263
267
}
264
268
269
+ // listItemTaskLimiter limits the amount of list items to process in parallel.
270
+ type listItemTaskLimiter struct {
271
+ sem WeightedSemaphore
272
+ wg sync.WaitGroup
273
+ }
274
+
275
+ // Run executes the given task concurrently, blocking if the pool is at capacity.
276
+ func (t * listItemTaskLimiter ) Run (ctx context.Context , task func ()) error {
277
+ t .wg .Add (1 )
278
+ if err := t .sem .Acquire (ctx , 1 ); err != nil {
279
+ t .wg .Done ()
280
+ return fmt .Errorf ("failed to acquire semaphore: %w" , err )
281
+ }
282
+
283
+ go func () {
284
+ defer t .wg .Done ()
285
+ defer t .sem .Release (1 )
286
+
287
+ task ()
288
+ }()
289
+
290
+ return nil
291
+ }
292
+
293
+ // Wait blocks until all submitted tasks have completed.
294
+ func (t * listItemTaskLimiter ) Wait () {
295
+ t .wg .Wait ()
296
+ }
297
+
265
298
// ListRetryFuncNever never retries on errors
266
299
func ListRetryFuncNever (_ error ) bool {
267
300
return false
@@ -446,6 +479,13 @@ func (c *clusterCache) newResource(un *unstructured.Unstructured) *Resource {
446
479
return resource
447
480
}
448
481
482
+ func (c * clusterCache ) newListItemTaskLimiter () * listItemTaskLimiter {
483
+ return & listItemTaskLimiter {
484
+ sem : semaphore .NewWeighted (c .listItemSemaphoreWeight ),
485
+ wg : sync.WaitGroup {},
486
+ }
487
+ }
488
+
449
489
func (c * clusterCache ) setNode (n * Resource ) {
450
490
key := n .ResourceKey ()
451
491
c .resources [key ] = n
@@ -629,17 +669,33 @@ func (c *clusterCache) listResources(ctx context.Context, resClient dynamic.Reso
629
669
630
670
// loadInitialState loads the state of all the resources retrieved by the given resource client.
631
671
func (c * clusterCache ) loadInitialState (ctx context.Context , api kube.APIResourceInfo , resClient dynamic.ResourceInterface , ns string , lock bool ) (string , error ) {
632
- var items []* Resource
672
+ var (
673
+ items []* Resource
674
+ listLock = sync.Mutex {}
675
+ limiter = c .newListItemTaskLimiter ()
676
+ )
677
+
633
678
resourceVersion , err := c .listResources (ctx , resClient , func (listPager * pager.ListPager ) error {
634
679
return listPager .EachListItem (ctx , metav1.ListOptions {}, func (obj runtime.Object ) error {
635
680
if un , ok := obj .(* unstructured.Unstructured ); ! ok {
636
681
return fmt .Errorf ("object %s/%s has an unexpected type" , un .GroupVersionKind ().String (), un .GetName ())
637
682
} else {
638
- items = append (items , c .newResource (un ))
683
+ if err := limiter .Run (ctx , func () {
684
+ newRes := c .newResource (un )
685
+ listLock .Lock ()
686
+ items = append (items , newRes )
687
+ listLock .Unlock ()
688
+ }); err != nil {
689
+ return fmt .Errorf ("failed to process list item: %w" , err )
690
+ }
639
691
}
640
692
return nil
641
693
})
642
694
})
695
+
696
+ // Wait until all items have completed processing.
697
+ limiter .Wait ()
698
+
643
699
if err != nil {
644
700
return "" , fmt .Errorf ("failed to load initial state of resource %s: %w" , api .GroupKind .String (), err )
645
701
}
@@ -938,19 +994,29 @@ func (c *clusterCache) sync() error {
938
994
lock .Unlock ()
939
995
940
996
return c .processApi (client , api , func (resClient dynamic.ResourceInterface , ns string ) error {
997
+ limiter := c .newListItemTaskLimiter ()
998
+
941
999
resourceVersion , err := c .listResources (ctx , resClient , func (listPager * pager.ListPager ) error {
942
1000
return listPager .EachListItem (context .Background (), metav1.ListOptions {}, func (obj runtime.Object ) error {
943
1001
if un , ok := obj .(* unstructured.Unstructured ); ! ok {
944
1002
return fmt .Errorf ("object %s/%s has an unexpected type" , un .GroupVersionKind ().String (), un .GetName ())
945
1003
} else {
946
- newRes := c .newResource (un )
947
- lock .Lock ()
948
- c .setNode (newRes )
949
- lock .Unlock ()
1004
+ if err := limiter .Run (ctx , func () {
1005
+ newRes := c .newResource (un )
1006
+ lock .Lock ()
1007
+ c .setNode (newRes )
1008
+ lock .Unlock ()
1009
+ }); err != nil {
1010
+ return fmt .Errorf ("failed to process list item: %w" , err )
1011
+ }
950
1012
}
951
1013
return nil
952
1014
})
953
1015
})
1016
+
1017
+ // Wait until all items have completed processing.
1018
+ limiter .Wait ()
1019
+
954
1020
if err != nil {
955
1021
if c .isRestrictedResource (err ) {
956
1022
keep := false
0 commit comments