Skip to content

Commit 45607b8

Browse files
committed
fix: make deletion idempotent
- Don't error when a delete returns NotFound. This means the object was deleted asynchronously by another client between the initial GET for task scheduling and the delete task itself. Log a warning, because it's possible the deletion may have violated dependency ordering. - Update apply and delte task logs to only log errors if the verbosity is greater than -v=4. This should help reduce duplicate log messages.
1 parent 0cb95ee commit 45607b8

File tree

3 files changed

+85
-39
lines changed

3 files changed

+85
-39
lines changed

pkg/apply/prune/prune.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ func (p *Pruner) Prune(
103103
uid := obj.GetUID()
104104
if uid == "" {
105105
err := object.NotFound([]interface{}{"metadata", "uid"}, "")
106+
if klog.V(4).Enabled() {
107+
// only log event emitted errors if the verbosity > 4
108+
klog.Errorf("prune uid lookup errored (object: %s): %v", id, err)
109+
}
106110
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, err))
107111
taskContext.InventoryManager().AddFailedDelete(id)
108112
continue
@@ -116,7 +120,8 @@ func (p *Pruner) Prune(
116120
if filterErr != nil {
117121
var fatalErr *filter.FatalError
118122
if errors.As(filterErr, &fatalErr) {
119-
if klog.V(5).Enabled() {
123+
if klog.V(4).Enabled() {
124+
// only log event emitted errors if the verbosity > 4
120125
klog.Errorf("prune filter errored (filter: %s, object: %s): %v", pruneFilter.Name(), id, fatalErr.Err)
121126
}
122127
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, fatalErr.Err))
@@ -134,6 +139,7 @@ func (p *Pruner) Prune(
134139
obj, err = p.removeInventoryAnnotation(obj)
135140
if err != nil {
136141
if klog.V(4).Enabled() {
142+
// only log event emitted errors if the verbosity > 4
137143
klog.Errorf("error removing annotation (object: %q, annotation: %q): %v", id, inventory.OwningInventoryKey, err)
138144
}
139145
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, err))
@@ -167,12 +173,18 @@ func (p *Pruner) Prune(
167173
PropagationPolicy: &opts.PropagationPolicy,
168174
})
169175
if err != nil {
170-
if klog.V(4).Enabled() {
171-
klog.Errorf("error deleting object (object: %q): %v", id, err)
176+
if apierrors.IsNotFound(err) {
177+
klog.Warningf("error deleting object (object: %q): object not found: object may have been deleted asynchronously by another client", id)
178+
// treat this as successful idempotent deletion
179+
} else {
180+
if klog.V(4).Enabled() {
181+
// only log event emitted errors if the verbosity > 4
182+
klog.Errorf("error deleting object (object: %q): %v", id, err)
183+
}
184+
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, err))
185+
taskContext.InventoryManager().AddFailedDelete(id)
186+
continue
172187
}
173-
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, err))
174-
taskContext.InventoryManager().AddFailedDelete(id)
175-
continue
176188
}
177189
}
178190
taskContext.InventoryManager().AddSuccessfulDelete(id, obj.GetUID())

pkg/apply/prune/prune_test.go

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ var (
184184

185185
func TestPrune(t *testing.T) {
186186
tests := map[string]struct {
187+
clusterObjs []*unstructured.Unstructured
187188
pruneObjs []*unstructured.Unstructured
188189
pruneFilters []filter.ValidationFilter
189190
options Options
@@ -193,13 +194,15 @@ func TestPrune(t *testing.T) {
193194
expectedAbandoned object.ObjMetadataSet
194195
}{
195196
"No pruned objects; no prune/delete events": {
197+
clusterObjs: []*unstructured.Unstructured{},
196198
pruneObjs: []*unstructured.Unstructured{},
197199
options: defaultOptions,
198200
expectedEvents: nil,
199201
},
200202
"One successfully pruned object": {
201-
pruneObjs: []*unstructured.Unstructured{pod},
202-
options: defaultOptions,
203+
clusterObjs: []*unstructured.Unstructured{pod},
204+
pruneObjs: []*unstructured.Unstructured{pod},
205+
options: defaultOptions,
203206
expectedEvents: []event.Event{
204207
{
205208
Type: event.PruneType,
@@ -212,8 +215,9 @@ func TestPrune(t *testing.T) {
212215
},
213216
},
214217
"Multiple successfully pruned object": {
215-
pruneObjs: []*unstructured.Unstructured{pod, pdb, namespace},
216-
options: defaultOptions,
218+
clusterObjs: []*unstructured.Unstructured{pod, pdb, namespace},
219+
pruneObjs: []*unstructured.Unstructured{pod, pdb, namespace},
220+
options: defaultOptions,
217221
expectedEvents: []event.Event{
218222
{
219223
Type: event.PruneType,
@@ -242,8 +246,9 @@ func TestPrune(t *testing.T) {
242246
},
243247
},
244248
"One successfully deleted object": {
245-
pruneObjs: []*unstructured.Unstructured{pod},
246-
options: defaultOptionsDestroy,
249+
clusterObjs: []*unstructured.Unstructured{pod},
250+
pruneObjs: []*unstructured.Unstructured{pod},
251+
options: defaultOptionsDestroy,
247252
expectedEvents: []event.Event{
248253
{
249254
Type: event.DeleteType,
@@ -256,8 +261,9 @@ func TestPrune(t *testing.T) {
256261
},
257262
},
258263
"Multiple successfully deleted objects": {
259-
pruneObjs: []*unstructured.Unstructured{pod, pdb, namespace},
260-
options: defaultOptionsDestroy,
264+
clusterObjs: []*unstructured.Unstructured{pod, pdb, namespace},
265+
pruneObjs: []*unstructured.Unstructured{pod, pdb, namespace},
266+
options: defaultOptionsDestroy,
261267
expectedEvents: []event.Event{
262268
{
263269
Type: event.DeleteType,
@@ -286,8 +292,9 @@ func TestPrune(t *testing.T) {
286292
},
287293
},
288294
"Client dry run still pruned event": {
289-
pruneObjs: []*unstructured.Unstructured{pod},
290-
options: clientDryRunOptions,
295+
clusterObjs: []*unstructured.Unstructured{pod},
296+
pruneObjs: []*unstructured.Unstructured{pod},
297+
options: clientDryRunOptions,
291298
expectedEvents: []event.Event{
292299
{
293300
Type: event.PruneType,
@@ -300,7 +307,8 @@ func TestPrune(t *testing.T) {
300307
},
301308
},
302309
"Server dry run still deleted event": {
303-
pruneObjs: []*unstructured.Unstructured{pod},
310+
clusterObjs: []*unstructured.Unstructured{pod},
311+
pruneObjs: []*unstructured.Unstructured{pod},
304312
options: Options{
305313
DryRunStrategy: common.DryRunServer,
306314
PropagationPolicy: metav1.DeletePropagationBackground,
@@ -318,7 +326,8 @@ func TestPrune(t *testing.T) {
318326
},
319327
},
320328
"UID match means prune skipped": {
321-
pruneObjs: []*unstructured.Unstructured{pod},
329+
clusterObjs: []*unstructured.Unstructured{pod},
330+
pruneObjs: []*unstructured.Unstructured{pod},
322331
pruneFilters: []filter.ValidationFilter{
323332
filter.CurrentUIDFilter{
324333
// Add pod UID to set of current UIDs
@@ -344,7 +353,8 @@ func TestPrune(t *testing.T) {
344353
},
345354
},
346355
"UID match for only one object one pruned, one skipped": {
347-
pruneObjs: []*unstructured.Unstructured{pod, pdb},
356+
clusterObjs: []*unstructured.Unstructured{pod, pdb},
357+
pruneObjs: []*unstructured.Unstructured{pod, pdb},
348358
pruneFilters: []filter.ValidationFilter{
349359
filter.CurrentUIDFilter{
350360
// Add pod UID to set of current UIDs
@@ -378,6 +388,10 @@ func TestPrune(t *testing.T) {
378388
},
379389
},
380390
"Prevent delete annotation equals prune skipped": {
391+
clusterObjs: []*unstructured.Unstructured{
392+
podDeletionPrevention,
393+
testutil.Unstructured(t, pdbDeletePreventionManifest),
394+
},
381395
pruneObjs: []*unstructured.Unstructured{
382396
podDeletionPrevention,
383397
testutil.Unstructured(t, pdbDeletePreventionManifest),
@@ -422,6 +436,10 @@ func TestPrune(t *testing.T) {
422436
},
423437
},
424438
"Prevent delete annotation equals delete skipped": {
439+
clusterObjs: []*unstructured.Unstructured{
440+
podDeletionPrevention,
441+
testutil.Unstructured(t, pdbDeletePreventionManifest),
442+
},
425443
pruneObjs: []*unstructured.Unstructured{
426444
podDeletionPrevention,
427445
testutil.Unstructured(t, pdbDeletePreventionManifest),
@@ -466,6 +484,7 @@ func TestPrune(t *testing.T) {
466484
},
467485
},
468486
"Prevent delete annotation, one skipped, one pruned": {
487+
clusterObjs: []*unstructured.Unstructured{podDeletionPrevention, pod},
469488
pruneObjs: []*unstructured.Unstructured{podDeletionPrevention, pod},
470489
pruneFilters: []filter.ValidationFilter{filter.PreventRemoveFilter{}},
471490
options: defaultOptions,
@@ -500,7 +519,8 @@ func TestPrune(t *testing.T) {
500519
},
501520
},
502521
"Namespace prune skipped": {
503-
pruneObjs: []*unstructured.Unstructured{namespace},
522+
clusterObjs: []*unstructured.Unstructured{namespace},
523+
pruneObjs: []*unstructured.Unstructured{namespace},
504524
pruneFilters: []filter.ValidationFilter{
505525
filter.LocalNamespacesFilter{
506526
LocalNamespaces: sets.NewString(namespace.GetName()),
@@ -524,19 +544,34 @@ func TestPrune(t *testing.T) {
524544
object.UnstructuredToObjMetadata(namespace),
525545
},
526546
},
547+
"Deletion of already deleted object": {
548+
clusterObjs: []*unstructured.Unstructured{},
549+
pruneObjs: []*unstructured.Unstructured{pod},
550+
options: defaultOptionsDestroy,
551+
expectedEvents: []event.Event{
552+
{
553+
Type: event.DeleteType,
554+
DeleteEvent: event.DeleteEvent{
555+
Identifier: object.UnstructuredToObjMetadata(pod),
556+
Status: event.DeleteSuccessful,
557+
Object: pod,
558+
},
559+
},
560+
},
561+
},
527562
}
528563

529564
for name, tc := range tests {
530565
t.Run(name, func(t *testing.T) {
531566
// Set up the fake dynamic client to recognize all objects, and the RESTMapper.
532-
objs := make([]runtime.Object, 0, len(tc.pruneObjs))
533-
for _, obj := range tc.pruneObjs {
534-
objs = append(objs, obj)
567+
clusterObjs := make([]runtime.Object, 0, len(tc.clusterObjs))
568+
for _, obj := range tc.clusterObjs {
569+
clusterObjs = append(clusterObjs, obj)
535570
}
536571
pruneIds := object.UnstructuredSetToObjMetadataSet(tc.pruneObjs)
537572
po := Pruner{
538573
InvClient: inventory.NewFakeClient(pruneIds),
539-
Client: fake.NewSimpleDynamicClient(scheme.Scheme, objs...),
574+
Client: fake.NewSimpleDynamicClient(scheme.Scheme, clusterObjs...),
540575
Mapper: testrestmapper.TestOnlyStaticRESTMapper(scheme.Scheme,
541576
scheme.Scheme.PrioritizedVersionsAllGroups()...),
542577
}

pkg/apply/task/apply_task.go

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,12 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
9999
obj = info.Object.(*unstructured.Unstructured)
100100
id := object.UnstructuredToObjMetadata(obj)
101101
if err != nil {
102+
err = applyerror.NewUnknownTypeError(err)
102103
if klog.V(4).Enabled() {
103-
klog.Errorf("unable to convert obj to info for %s/%s (%s)--continue",
104-
obj.GetNamespace(), obj.GetName(), err)
104+
// only log event emitted errors if the verbosity > 4
105+
klog.Errorf("apply task errored (object: %s): unable to convert obj to info: %v", id, err)
105106
}
106-
taskContext.SendEvent(a.createApplyFailedEvent(
107-
id,
108-
applyerror.NewUnknownTypeError(err),
109-
))
107+
taskContext.SendEvent(a.createApplyFailedEvent(id, err))
110108
taskContext.InventoryManager().AddFailedApply(id)
111109
continue
112110
}
@@ -119,7 +117,8 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
119117
if filterErr != nil {
120118
var fatalErr *filter.FatalError
121119
if errors.As(filterErr, &fatalErr) {
122-
if klog.V(5).Enabled() {
120+
if klog.V(4).Enabled() {
121+
// only log event emitted errors if the verbosity > 4
123122
klog.Errorf("apply filter errored (filter: %s, object: %s): %v", applyFilter.Name(), id, fatalErr.Err)
124123
}
125124
taskContext.SendEvent(a.createApplyFailedEvent(id, err))
@@ -139,8 +138,9 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
139138
// Execute mutators, if any apply
140139
err = a.mutate(ctx, obj)
141140
if err != nil {
142-
if klog.V(5).Enabled() {
143-
klog.Errorf("error mutating: %v", err)
141+
if klog.V(4).Enabled() {
142+
// only log event emitted errors if the verbosity > 4
143+
klog.Errorf("apply mutation errored (object: %s): %v", id, err)
144144
}
145145
taskContext.SendEvent(a.createApplyFailedEvent(id, err))
146146
taskContext.InventoryManager().AddFailedApply(id)
@@ -152,7 +152,7 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
152152
ao := applyOptionsFactoryFunc(a.Name(), taskContext.EventChannel(),
153153
a.ServerSideOptions, a.DryRunStrategy, a.DynamicClient, a.OpenAPIGetter)
154154
ao.SetObjects([]*resource.Info{info})
155-
klog.V(5).Infof("applying %s/%s...", info.Namespace, info.Name)
155+
klog.V(5).Infof("applying object: %v", id)
156156
err = ao.Run()
157157
if err != nil && a.ServerSideOptions.ServerSideApply && isAPIService(obj) && isStreamError(err) {
158158
// Server-side Apply doesn't work with APIService before k8s 1.21
@@ -161,13 +161,12 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
161161
err = a.clientSideApply(info, taskContext.EventChannel())
162162
}
163163
if err != nil {
164+
err = applyerror.NewApplyRunError(err)
164165
if klog.V(4).Enabled() {
165-
klog.Errorf("error applying (%s/%s) %s", info.Namespace, info.Name, err)
166+
// only log event emitted errors if the verbosity > 4
167+
klog.Errorf("apply errored (object: %s): %v", id, err)
166168
}
167-
taskContext.SendEvent(a.createApplyFailedEvent(
168-
id,
169-
applyerror.NewApplyRunError(err),
170-
))
169+
taskContext.SendEvent(a.createApplyFailedEvent(id, err))
171170
taskContext.InventoryManager().AddFailedApply(id)
172171
} else if info.Object != nil {
173172
acc, err := meta.Accessor(info.Object)

0 commit comments

Comments
 (0)