Skip to content

Commit 6205a8a

Browse files
Skarlsomatthiasbrunsjakobmoellerdev
authored
fix: error checking and prune bool check upon HasPruned and removed some dead code (open-component-model#1897)
<!-- markdownlint-disable MD041 --> #### What this PR does / why we need it When all objects were deleted at the same time, there was an error check to see if the specific error was a deletion error or not. This was checked with errors.Is. `errors.Is` however, is also checking for the message being there and it was comparing with an empty error. Another options would be to implement `Is` on the error, but that's literally just reimplementing As. Here, we want to make sure that whatever is coming back is the Type of the error and not Is that error. Further, the HasPruned's check was actually inverted. We wanted to do the thing when it actually HasPruned. And not when it hasn't. Lastely, removed some dead code and added ```go if component.Status.Component.RepositorySpec == nil { ``` which is actually a fix for a masked NIL panic that happened further below. #### Which issue(s) this PR fixes <!-- Usage: `Fixes #<issue number>`, or `Fixes (paste link of issue)`. --> #### Testing ##### How to test the changes <!-- Required files to test the changes: .ocmconfig ```yaml type: generic.config.ocm.software/v1 configurations: - type: credentials.config.ocm.software repositories: - repository: type: DockerConfig/v1 dockerConfigFile: "~/.docker/config.json" ``` Commands that test the change: ```bash ocm get cv xxx ocm transfer xxx ``` --> ##### Verification - [ ] I have tested the changes locally by running `ocm` <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Improved detection and messaging for unavailable resources. * Added nil-safety checks to prevent failures when resource specifications are missing. * **Improvements** * Better reconciliation progress signaling to reduce unnecessary requeues during cleanup. * More precise error handling to distinguish transient vs. final unavailability, improving stability and observability. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Gergely Brautigam <182850+Skarlso@users.noreply.github.com> Co-authored-by: Matthias Bruns <github@matthiasbruns.com> Co-authored-by: Jakob Möller <jakob.moeller@sap.com>
1 parent 8f39ca2 commit 6205a8a

File tree

6 files changed

+461
-28
lines changed

6 files changed

+461
-28
lines changed

kubernetes/controller/internal/controller/component/component_controller.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,11 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
209209
// However, as the component is hard-dependant on the repository, we decided to mark it not ready as well.
210210
status.MarkNotReady(r.EventRecorder, component, v1alpha1.GetResourceFailedReason, "OCM Repository is not ready")
211211

212-
if errors.Is(err, util.NotReadyError{}) || errors.Is(err, util.DeletionError{}) {
213-
logger.Info(err.Error())
212+
var notReadyErr util.NotReadyError
213+
var deletionErr util.DeletionError
214+
if errors.As(err, &notReadyErr) || errors.As(err, &deletionErr) {
215+
logger.Info("repository is not available", "error", err)
214216

215-
// return no requeue as we watch the object for changes anyway
216217
return ctrl.Result{}, nil
217218
}
218219

kubernetes/controller/internal/controller/component/component_controller_unit_test.go

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,216 @@ import (
3333
"ocm.software/open-component-model/kubernetes/controller/internal/resolution/workerpool"
3434
)
3535

36+
func newComponentReconciler(fakeClient client.Client, scheme *runtime.Scheme) *Reconciler {
37+
return &Reconciler{
38+
BaseReconciler: &ocm.BaseReconciler{
39+
Client: fakeClient,
40+
Scheme: scheme,
41+
EventRecorder: &record.FakeRecorder{Events: make(chan string, 100)},
42+
},
43+
}
44+
}
45+
46+
func TestReconcile_RepositoryNotReady_ReturnsNoRequeue(t *testing.T) {
47+
g := NewWithT(t)
48+
ctx := t.Context()
49+
50+
scheme := runtime.NewScheme()
51+
g.Expect(corev1.AddToScheme(scheme)).To(Succeed())
52+
g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed())
53+
54+
repo := &v1alpha1.Repository{
55+
ObjectMeta: metav1.ObjectMeta{
56+
Name: "test-repo",
57+
Namespace: "default",
58+
},
59+
Spec: v1alpha1.RepositorySpec{
60+
Interval: metav1.Duration{Duration: time.Minute},
61+
},
62+
}
63+
64+
component := &v1alpha1.Component{
65+
ObjectMeta: metav1.ObjectMeta{
66+
Name: "test-component",
67+
Namespace: "default",
68+
Finalizers: []string{v1alpha1.ComponentFinalizer},
69+
},
70+
Spec: v1alpha1.ComponentSpec{
71+
RepositoryRef: corev1.LocalObjectReference{Name: "test-repo"},
72+
Component: "ocm.software/test",
73+
Semver: "1.0.0",
74+
Interval: metav1.Duration{Duration: time.Minute},
75+
},
76+
}
77+
78+
fakeClient := fake.NewClientBuilder().
79+
WithScheme(scheme).
80+
WithObjects(repo, component).
81+
WithStatusSubresource(&v1alpha1.Component{}, &v1alpha1.Repository{}).
82+
Build()
83+
84+
result, err := newComponentReconciler(fakeClient, scheme).Reconcile(ctx, ctrl.Request{
85+
NamespacedName: types.NamespacedName{Name: "test-component", Namespace: "default"},
86+
})
87+
88+
g.Expect(err).ToNot(HaveOccurred())
89+
g.Expect(result).To(Equal(ctrl.Result{}))
90+
}
91+
92+
func TestReconcile_RepositoryBeingDeleted_ReturnsNoRequeue(t *testing.T) {
93+
g := NewWithT(t)
94+
ctx := t.Context()
95+
96+
scheme := runtime.NewScheme()
97+
g.Expect(corev1.AddToScheme(scheme)).To(Succeed())
98+
g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed())
99+
100+
now := metav1.Now()
101+
repo := &v1alpha1.Repository{
102+
ObjectMeta: metav1.ObjectMeta{
103+
Name: "test-repo",
104+
Namespace: "default",
105+
DeletionTimestamp: &now,
106+
Finalizers: []string{"prevent-deletion"},
107+
},
108+
Spec: v1alpha1.RepositorySpec{
109+
Interval: metav1.Duration{Duration: time.Minute},
110+
},
111+
}
112+
conditions.MarkTrue(repo, "Ready", "ready", "message")
113+
114+
component := &v1alpha1.Component{
115+
ObjectMeta: metav1.ObjectMeta{
116+
Name: "test-component",
117+
Namespace: "default",
118+
Finalizers: []string{v1alpha1.ComponentFinalizer},
119+
},
120+
Spec: v1alpha1.ComponentSpec{
121+
RepositoryRef: corev1.LocalObjectReference{Name: "test-repo"},
122+
Component: "ocm.software/test",
123+
Semver: "1.0.0",
124+
Interval: metav1.Duration{Duration: time.Minute},
125+
},
126+
}
127+
128+
fakeClient := fake.NewClientBuilder().
129+
WithScheme(scheme).
130+
WithObjects(repo, component).
131+
WithStatusSubresource(&v1alpha1.Component{}, &v1alpha1.Repository{}).
132+
Build()
133+
134+
result, err := newComponentReconciler(fakeClient, scheme).Reconcile(ctx, ctrl.Request{
135+
NamespacedName: types.NamespacedName{Name: "test-component", Namespace: "default"},
136+
})
137+
138+
g.Expect(err).ToNot(HaveOccurred())
139+
g.Expect(result).To(Equal(ctrl.Result{}))
140+
}
141+
142+
func TestReconcile_RepositoryNotFound_ReturnsError(t *testing.T) {
143+
g := NewWithT(t)
144+
ctx := t.Context()
145+
146+
scheme := runtime.NewScheme()
147+
g.Expect(corev1.AddToScheme(scheme)).To(Succeed())
148+
g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed())
149+
150+
component := &v1alpha1.Component{
151+
ObjectMeta: metav1.ObjectMeta{
152+
Name: "test-component",
153+
Namespace: "default",
154+
Finalizers: []string{v1alpha1.ComponentFinalizer},
155+
},
156+
Spec: v1alpha1.ComponentSpec{
157+
RepositoryRef: corev1.LocalObjectReference{Name: "nonexistent-repo"},
158+
Component: "ocm.software/test",
159+
Semver: "1.0.0",
160+
Interval: metav1.Duration{Duration: time.Minute},
161+
},
162+
}
163+
164+
fakeClient := fake.NewClientBuilder().
165+
WithScheme(scheme).
166+
WithObjects(component).
167+
WithStatusSubresource(&v1alpha1.Component{}).
168+
Build()
169+
170+
_, err := newComponentReconciler(fakeClient, scheme).Reconcile(ctx, ctrl.Request{
171+
NamespacedName: types.NamespacedName{Name: "test-component", Namespace: "default"},
172+
})
173+
174+
g.Expect(err).To(HaveOccurred())
175+
g.Expect(err.Error()).To(ContainSubstring("failed to get ready repository"))
176+
}
177+
178+
func TestReconcile_ComponentNotFound_ReturnsNoError(t *testing.T) {
179+
g := NewWithT(t)
180+
ctx := t.Context()
181+
182+
scheme := runtime.NewScheme()
183+
g.Expect(corev1.AddToScheme(scheme)).To(Succeed())
184+
g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed())
185+
186+
fakeClient := fake.NewClientBuilder().
187+
WithScheme(scheme).
188+
Build()
189+
190+
result, err := newComponentReconciler(fakeClient, scheme).Reconcile(ctx, ctrl.Request{
191+
NamespacedName: types.NamespacedName{Name: "nonexistent", Namespace: "default"},
192+
})
193+
194+
g.Expect(err).ToNot(HaveOccurred())
195+
g.Expect(result).To(Equal(ctrl.Result{}))
196+
}
197+
198+
func TestReconcile_RepositoryNotReady_ComponentMarkedNotReady(t *testing.T) {
199+
g := NewWithT(t)
200+
ctx := t.Context()
201+
202+
scheme := runtime.NewScheme()
203+
g.Expect(corev1.AddToScheme(scheme)).To(Succeed())
204+
g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed())
205+
206+
repo := &v1alpha1.Repository{
207+
ObjectMeta: metav1.ObjectMeta{
208+
Name: "test-repo",
209+
Namespace: "default",
210+
},
211+
Spec: v1alpha1.RepositorySpec{
212+
Interval: metav1.Duration{Duration: time.Minute},
213+
},
214+
}
215+
216+
component := &v1alpha1.Component{
217+
ObjectMeta: metav1.ObjectMeta{
218+
Name: "test-component",
219+
Namespace: "default",
220+
Finalizers: []string{v1alpha1.ComponentFinalizer},
221+
},
222+
Spec: v1alpha1.ComponentSpec{
223+
RepositoryRef: corev1.LocalObjectReference{Name: "test-repo"},
224+
Component: "ocm.software/test",
225+
Semver: "1.0.0",
226+
Interval: metav1.Duration{Duration: time.Minute},
227+
},
228+
}
229+
230+
fakeClient := fake.NewClientBuilder().
231+
WithScheme(scheme).
232+
WithObjects(repo, component).
233+
WithStatusSubresource(&v1alpha1.Component{}, &v1alpha1.Repository{}).
234+
Build()
235+
236+
_, err := newComponentReconciler(fakeClient, scheme).Reconcile(ctx, ctrl.Request{
237+
NamespacedName: types.NamespacedName{Name: "test-component", Namespace: "default"},
238+
})
239+
g.Expect(err).ToNot(HaveOccurred())
240+
241+
updated := &v1alpha1.Component{}
242+
g.Expect(fakeClient.Get(ctx, types.NamespacedName{Name: "test-component", Namespace: "default"}, updated)).To(Succeed())
243+
g.Expect(conditions.IsReady(updated)).To(BeFalse())
244+
}
245+
36246
// TestResolutionInProgress_UnitTest tests that the first reconciliation returns ResolutionInProgress
37247
// when a component version resolution is started. This test uses a fake client to avoid race conditions
38248
// with the fast event-driven reconciliation. This cannot be tested reliably over envtest because the

kubernetes/controller/internal/controller/deployer/deployer_controller.go

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"io"
99
"runtime"
1010
"slices"
11+
"time"
1112

1213
"github.com/fluxcd/pkg/runtime/patch"
1314
"github.com/go-logr/logr"
@@ -209,7 +210,10 @@ func (r *Reconciler) setupDynamicResourceWatcherWithManager(mgr ctrl.Manager) (*
209210

210211
// Untrack removes the deployer from the tracked objects and stops the resource watch if it is still running.
211212
// It also removes the finalizer from the deployer if there are no more tracked objects.
212-
func (r *Reconciler) Untrack(ctx context.Context, deployer *deliveryv1alpha1.Deployer) error {
213+
// Untrack sends stop events for all active resource watches on the deployer.
214+
// Returns true if all watches are already stopped, false if stop events were
215+
// sent and another reconcile is needed to verify completion.
216+
func (r *Reconciler) Untrack(ctx context.Context, deployer *deliveryv1alpha1.Deployer) (bool, error) {
213217
logger := log.FromContext(ctx)
214218
var atLeastOneResourceNeededStopWatch bool
215219
for _, obj := range r.resourceWatches(deployer) {
@@ -221,26 +225,29 @@ func (r *Reconciler) Untrack(ctx context.Context, deployer *deliveryv1alpha1.Dep
221225
Child: obj,
222226
}:
223227
case <-ctx.Done():
224-
return fmt.Errorf("context canceled while unregistering resource watch for deployer %s: %w", deployer.Name, ctx.Err())
228+
return false, fmt.Errorf("context canceled while unregistering resource watch for deployer %s: %w", deployer.Name, ctx.Err())
225229
}
226230
atLeastOneResourceNeededStopWatch = true
227231
}
228232
}
229233
if atLeastOneResourceNeededStopWatch {
230-
return fmt.Errorf("waiting for at least one resource watch to be removed")
234+
return false, nil
231235
}
232236

233-
return nil
237+
return true, nil
234238
}
235239

236-
func (r *Reconciler) pruneWithApplySet(ctx context.Context, deployer *deliveryv1alpha1.Deployer) error {
240+
// pruneWithApplySet prunes all resources managed by the deployer's ApplySet.
241+
// Returns true if pruning is complete (nothing left to prune), false if resources
242+
// are still being pruned and another reconcile is needed.
243+
func (r *Reconciler) pruneWithApplySet(ctx context.Context, deployer *deliveryv1alpha1.Deployer) (bool, error) {
237244
logger := log.FromContext(ctx).WithValues("deployer", deployer.Name, "namespace", deployer.Namespace)
238245

239246
set := r.createApplySet(deployer, logger)
240247

241248
metadata, err := set.Project(nil)
242249
if err != nil {
243-
return fmt.Errorf("failed to project ApplySet: %w", err)
250+
return false, fmt.Errorf("failed to project ApplySet: %w", err)
244251
}
245252

246253
logger.Info("pruning ApplySet", "scope", metadata.PruneScope())
@@ -250,21 +257,17 @@ func (r *Reconciler) pruneWithApplySet(ctx context.Context, deployer *deliveryv1
250257
Concurrency: runtime.NumCPU(),
251258
})
252259
if err != nil {
253-
return fmt.Errorf("failed to prune ApplySet: %w", err)
260+
return false, fmt.Errorf("failed to prune ApplySet: %w", err)
254261
}
255262

256-
// Log results
257263
logger.Info("ApplySet prune operation complete", "pruned", len(result.Pruned))
258264

259-
// Prune calls delete on every resource found, even if its already being deleted.
260-
// If we were to remove this check, the deployer might be deleted while a child is stuck in terminating state.
261-
if !result.HasPruned() {
262-
logger.Info("pruned resources, doing one more pruning until nothing more to prune")
263-
return fmt.Errorf("waiting for all resources to be pruned")
265+
if result.HasPruned() {
266+
logger.Info("resources still being pruned, waiting for them to be fully removed")
267+
return false, nil
264268
}
265269

266-
// nothing more to prune, remove finalizer
267-
return nil
270+
return true, nil
268271
}
269272

270273
func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, err error) {
@@ -302,10 +305,11 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
302305
if err != nil {
303306
status.MarkNotReady(r.EventRecorder, deployer, deliveryv1alpha1.ResourceIsNotAvailable, err.Error())
304307

305-
if errors.Is(err, util.NotReadyError{}) || errors.Is(err, util.DeletionError{}) {
306-
logger.Info("stop reconciling as the resource is not available", "error", err.Error())
308+
var notReadyErr util.NotReadyError
309+
var deletionErr util.DeletionError
310+
if errors.As(err, &notReadyErr) || errors.As(err, &deletionErr) {
311+
logger.Info("resource is not available", "error", err)
307312

308-
// return no requeue as we watch the object for changes anyway
309313
return ctrl.Result{}, nil
310314
}
311315

@@ -363,20 +367,28 @@ func (r *Reconciler) reconcileDeletionTimestamp(ctx context.Context, deployer *d
363367

364368
if hasPruneSetFinalizer {
365369
logger.Info("pruning ApplySet before removing finalizer")
366-
if err := r.pruneWithApplySet(ctx, deployer); err != nil {
370+
done, err := r.pruneWithApplySet(ctx, deployer)
371+
switch {
372+
case err != nil:
367373
logger.Error(err, "waiting for ApplySet to be pruned before removing finalizer")
368374
errs = append(errs, err)
369-
} else {
375+
case !done:
376+
return ctrl.Result{RequeueAfter: time.Second}, nil, true
377+
default:
370378
logger.Info("successfully pruned ApplySet for deployer")
371379
controllerutil.RemoveFinalizer(deployer, applySetPruneFinalizer)
372380
}
373381
} else if controllerutil.ContainsFinalizer(deployer, resourceWatchFinalizer) {
374382
logger.Info("untracking resources before removing finalizer")
375-
if err := r.Untrack(ctx, deployer); err != nil {
383+
done, err := r.Untrack(ctx, deployer)
384+
switch {
385+
case err != nil:
376386
logger.Error(err, "waiting for tracked resources to be unregistered before pruning")
377387
errs = append(errs, err)
378-
} else {
379-
logger.Info("successfully unregistered all resource watches for deployer")
388+
case !done:
389+
return ctrl.Result{RequeueAfter: time.Second}, nil, true
390+
default:
391+
logger.Info("successfully untracked resources")
380392
controllerutil.RemoveFinalizer(deployer, resourceWatchFinalizer)
381393
}
382394
}
@@ -385,6 +397,14 @@ func (r *Reconciler) reconcileDeletionTimestamp(ctx context.Context, deployer *d
385397
return ctrl.Result{}, fmt.Errorf("failed to cleanup deployer before deletion: %w", errors.Join(errs...)), true
386398
}
387399

400+
// Requeue if there are still finalizers to process. Metadata-only changes
401+
// (like finalizer removal) do not trigger the GenerationChangedPredicate,
402+
// so an explicit requeue is needed.
403+
if controllerutil.ContainsFinalizer(deployer, applySetPruneFinalizer) ||
404+
controllerutil.ContainsFinalizer(deployer, resourceWatchFinalizer) {
405+
return ctrl.Result{Requeue: true}, nil, true
406+
}
407+
388408
logger.Info("successfully cleaned up deployer before deletion")
389409
return ctrl.Result{}, nil, true
390410
}

0 commit comments

Comments
 (0)