Skip to content

Commit 7473e64

Browse files
committed
DRA resource slice controller: use MutationCache to avoid race
This avoids the problem of creating an additional slice when the one from the previous sync is not in the informer cache yet. It also avoids false attempts to delete slices which were updated in the previous sync. Such attempts would fail the ResourceVersion precondition check, but would still cause work for the apiserver.
1 parent e88d5c3 commit 7473e64

File tree

2 files changed

+49
-22
lines changed

2 files changed

+49
-22
lines changed

staging/src/k8s.io/dynamic-resource-allocation/resourceslice/resourceslicecontroller.go

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ const (
5151
// poolNameIndex is the name for the ResourceSlice store's index function,
5252
// which is to index by ResourceSlice.Spec.Pool.Name
5353
poolNameIndex = "poolName"
54+
55+
// Including adds in the mutation cache is not safe: We could add a slice, store it,
56+
// and then the slice gets deleted without the informer hearing anything about that.
57+
// Then the obsolete slice remains in the mutation cache.
58+
//
59+
// To mitigate this, we use a TTL and check a pool again once added slices expire.
60+
defaultMutationCacheTTL = time.Minute
5461
)
5562

5663
// Controller synchronizes information about resources of one driver with
@@ -64,8 +71,9 @@ type Controller struct {
6471
kubeClient kubernetes.Interface
6572
wg sync.WaitGroup
6673
// The queue is keyed with the pool name that needs work.
67-
queue workqueue.TypedRateLimitingInterface[string]
68-
sliceStore cache.Indexer
74+
queue workqueue.TypedRateLimitingInterface[string]
75+
sliceStore cache.MutationCache
76+
mutationCacheTTL time.Duration
6977

7078
// Must use atomic access...
7179
numCreates int64
@@ -182,6 +190,10 @@ type Options struct {
182190

183191
// Queue can be used to override the default work queue implementation.
184192
Queue workqueue.TypedRateLimitingInterface[string]
193+
194+
// MutationCacheTTL can be used to change the default TTL of one minute.
195+
// See source code for details.
196+
MutationCacheTTL *time.Duration
185197
}
186198

187199
// Stop cancels all background activity and blocks until the controller has stopped.
@@ -249,20 +261,23 @@ func newController(ctx context.Context, options Options) (*Controller, error) {
249261
ctx, cancel := context.WithCancelCause(ctx)
250262

251263
c := &Controller{
252-
cancel: cancel,
253-
kubeClient: options.KubeClient,
254-
driverName: options.DriverName,
255-
owner: options.Owner.DeepCopy(),
256-
queue: options.Queue,
257-
resources: options.Resources,
264+
cancel: cancel,
265+
kubeClient: options.KubeClient,
266+
driverName: options.DriverName,
267+
owner: options.Owner.DeepCopy(),
268+
queue: options.Queue,
269+
resources: options.Resources,
270+
mutationCacheTTL: defaultMutationCacheTTL,
258271
}
259272
if c.queue == nil {
260273
c.queue = workqueue.NewTypedRateLimitingQueueWithConfig(
261274
workqueue.DefaultTypedControllerRateLimiter[string](),
262275
workqueue.TypedRateLimitingQueueConfig[string]{Name: "node_resource_slices"},
263276
)
264277
}
265-
278+
if options.MutationCacheTTL != nil {
279+
c.mutationCacheTTL = *options.MutationCacheTTL
280+
}
266281
if err := c.initInformer(ctx); err != nil {
267282
return nil, err
268283
}
@@ -298,7 +313,7 @@ func (c *Controller) initInformer(ctx context.Context) error {
298313
}, func(options *metav1.ListOptions) {
299314
options.FieldSelector = selector.String()
300315
})
301-
c.sliceStore = informer.GetIndexer()
316+
c.sliceStore = cache.NewIntegerResourceVersionMutationCache(informer.GetStore(), informer.GetIndexer(), c.mutationCacheTTL, true /* includeAdds */)
302317
handler, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
303318
AddFunc: func(obj any) {
304319
slice, ok := obj.(*resourceapi.ResourceSlice)
@@ -562,13 +577,16 @@ func (c *Controller) syncPool(ctx context.Context, poolName string) error {
562577
slice.Spec.Devices = pool.Slices[i].Devices
563578

564579
logger.V(5).Info("Updating existing resource slice", "slice", klog.KObj(slice))
565-
if _, err := c.kubeClient.ResourceV1alpha3().ResourceSlices().Update(ctx, slice, metav1.UpdateOptions{}); err != nil {
580+
slice, err := c.kubeClient.ResourceV1alpha3().ResourceSlices().Update(ctx, slice, metav1.UpdateOptions{})
581+
if err != nil {
566582
return fmt.Errorf("update resource slice: %w", err)
567583
}
568584
atomic.AddInt64(&c.numUpdates, 1)
585+
c.sliceStore.Mutation(slice)
569586
}
570587

571588
// Create new slices.
589+
added := false
572590
for i := 0; i < len(pool.Slices); i++ {
573591
if _, ok := currentSliceForDesiredSlice[i]; ok {
574592
// Was handled above through an update.
@@ -608,21 +626,25 @@ func (c *Controller) syncPool(ctx context.Context, poolName string) error {
608626
// It can happen that we create a missing slice, some
609627
// other change than the create causes another sync of
610628
// the pool, and then a second slice for the same set
611-
// of devices gets created because the controller has
629+
// of devices would get created because the controller has
612630
// no copy of the first slice instance in its informer
613631
// cache yet.
614632
//
615-
// This is not a problem: a client will either see one
616-
// of the two slices and use it or see both and do
617-
// nothing because of the duplicated device IDs.
618-
//
619-
// To avoid creating a second slice, we would have to use a
620-
// https://pkg.go.dev/k8s.io/client-go/tools/cache#MutationCache.
633+
// Using a https://pkg.go.dev/k8s.io/client-go/tools/cache#MutationCache
634+
// avoids that.
621635
logger.V(5).Info("Creating new resource slice")
622-
if _, err := c.kubeClient.ResourceV1alpha3().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{}); err != nil {
636+
slice, err := c.kubeClient.ResourceV1alpha3().ResourceSlices().Create(ctx, slice, metav1.CreateOptions{})
637+
if err != nil {
623638
return fmt.Errorf("create resource slice: %w", err)
624639
}
625640
atomic.AddInt64(&c.numCreates, 1)
641+
c.sliceStore.Mutation(slice)
642+
added = true
643+
}
644+
if added {
645+
// Check that the recently added slice(s) really exist even
646+
// after they expired from the mutation cache.
647+
c.queue.AddAfter(poolName, c.mutationCacheTTL)
626648
}
627649
} else if len(slices) > 0 {
628650
// All are obsolete, pool does not exist anymore.

staging/src/k8s.io/dynamic-resource-allocation/resourceslice/resourceslicecontroller_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -674,9 +674,14 @@ func TestControllerSyncPool(t *testing.T) {
674674
assert.Equal(t, test.expectedStats, ctrl.GetStats())
675675

676676
// The informer might have added a work item after ctrl.run returned.
677-
state := queue.State()
678-
state.Ready = nil
679-
assert.Equal(t, workqueue.MockState[string]{}, state)
677+
actualState := queue.State()
678+
actualState.Ready = nil
679+
var expectState workqueue.MockState[string]
680+
if test.expectedStats.NumCreates > 0 {
681+
expectState.Later = []workqueue.MockDelayedItem[string]{{Item: poolName, Duration: defaultMutationCacheTTL}}
682+
}
683+
assert.Equal(t, expectState, actualState)
684+
680685
})
681686
}
682687
}

0 commit comments

Comments
 (0)