Skip to content

Commit c177692

Browse files
Adjust cleanup to avoid panics and getting stuck
In one situation, an unresolved hub template was getting stuck on a ConfigurationPolicy and prevented the finalizer from being removed. While testing that case, it was found that the reconciler's DynamicWatcher could be nil in some cases and would thus cause a panic. Both of these situations are adressed by adjusting the cleanup logic so that it is clear that the only actions performed while cleaning up are related to the required removal of the finalizers. Refs: - https://issues.redhat.com/browse/ACM-22713 Signed-off-by: Justin Kulikauskas <[email protected]>
1 parent b390ed7 commit c177692

File tree

4 files changed

+86
-126
lines changed

4 files changed

+86
-126
lines changed

controllers/configurationpolicy_controller.go

Lines changed: 63 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,19 @@ func (r *ConfigurationPolicyReconciler) Reconcile(ctx context.Context, request c
214214
log := log.WithValues("name", request.Name, "namespace", request.Namespace)
215215
policy := &policyv1.ConfigurationPolicy{}
216216

217-
err := r.Get(ctx, request.NamespacedName, policy)
217+
cleanup, err := r.cleanupImmediately()
218+
if !cleanup && err != nil {
219+
log.Error(err, "Failed to determine if it's time to cleanup immediately")
220+
221+
return reconcile.Result{}, err
222+
}
223+
224+
err = r.Get(ctx, request.NamespacedName, policy)
218225
if k8serrors.IsNotFound(err) {
226+
if cleanup {
227+
return reconcile.Result{}, nil
228+
}
229+
219230
log.V(1).Info("Handling a deleted policy")
220231
removeConfigPolicyMetrics(request)
221232
r.SelectorReconciler.Stop(request.Namespace, request.Name)
@@ -247,39 +258,45 @@ func (r *ConfigurationPolicyReconciler) Reconcile(ctx context.Context, request c
247258
nonCompliantWithWatch := policy.Status.ComplianceState != policyv1.Compliant &&
248259
policy.Spec.EvaluationInterval.IsWatchForNonCompliant()
249260

250-
if !(compliantWithWatch || nonCompliantWithWatch) {
261+
if !(compliantWithWatch || nonCompliantWithWatch) && !cleanup {
251262
err := r.DynamicWatcher.RemoveWatcher(policy.ObjectIdentifier())
252263
if err != nil {
253264
log.Error(err, "Failed to remove any watches related to this ConfigurationPolicy. Will ignore.")
254265
}
255266
}
256267
}()
257268

258-
uninstalling, crdDeleting, err := r.cleanupImmediately()
259-
if !uninstalling && !crdDeleting && err != nil {
260-
log.Error(err, "Failed to determine if it's time to cleanup immediately")
261-
262-
return reconcile.Result{}, err
263-
}
264-
265269
// If the ConfigurationPolicy's spec field was updated, clear the cache of evaluated objects.
266270
if policy.Status.LastEvaluatedGeneration != policy.Generation {
267271
r.processedPolicyCache.Delete(policy.GetUID())
268272
}
269273

270-
if err := r.resolveHubTemplates(ctx, policy); err != nil {
271-
statusChanged := addConditionToStatus(policy, -1, false, "Hub template resolution failure", err.Error())
274+
// When *not* cleaning up, hub templates could change the `pruneObjectBehavior` setting, which
275+
// affects how the deletion finalizer is managed, so this must be done first.
276+
// But when `cleanup` is true, we must skip resolving the hub templates.
277+
if !cleanup {
278+
if err := r.resolveHubTemplates(ctx, policy); err != nil {
279+
statusChanged := addConditionToStatus(policy, -1, false, "Hub template resolution failure", err.Error())
272280

273-
if statusChanged {
274-
r.recordInfoEvent(policy, true)
275-
}
281+
if statusChanged {
282+
r.recordInfoEvent(policy, true)
283+
}
276284

277-
r.addForUpdate(policy, statusChanged)
285+
r.addForUpdate(policy, statusChanged)
278286

287+
return reconcile.Result{}, err
288+
}
289+
}
290+
291+
if err := r.manageDeletionFinalizer(policy, cleanup); err != nil {
279292
return reconcile.Result{}, err
280293
}
281294

282-
shouldEvaluate, durationLeft := r.shouldEvaluatePolicy(policy, (uninstalling || crdDeleting))
295+
if cleanup {
296+
return reconcile.Result{}, nil
297+
}
298+
299+
shouldEvaluate, durationLeft := r.shouldEvaluatePolicy(policy)
283300
if !shouldEvaluate {
284301
// Requeue based on the remaining time for the evaluation interval to be met.
285302
return reconcile.Result{RequeueAfter: durationLeft}, nil
@@ -393,16 +410,10 @@ func (r *ConfigurationPolicyReconciler) Reconcile(ctx context.Context, request c
393410
// cleanupImmediately should be set true when the controller is getting uninstalled.
394411
// If the policy is not ready to be evaluated, the returned duration is how long until the next evaluation.
395412
func (r *ConfigurationPolicyReconciler) shouldEvaluatePolicy(
396-
policy *policyv1.ConfigurationPolicy, cleanupImmediately bool,
413+
policy *policyv1.ConfigurationPolicy,
397414
) (bool, time.Duration) {
398415
log := log.WithValues("policy", policy.GetName())
399416

400-
// If it's time to clean up such as when the config-policy-controller is being uninstalled, only evaluate policies
401-
// with a finalizer to remove the finalizer.
402-
if cleanupImmediately {
403-
return len(policy.Finalizers) != 0, 0
404-
}
405-
406417
if cachedLastEval, ok := r.lastEvaluatedCache.Load(policy.UID); ok {
407418
last, cachedConversionErr := strconv.Atoi(cachedLastEval.(string))
408419
current, realConversionErr := strconv.Atoi(policy.GetResourceVersion())
@@ -706,17 +717,12 @@ func (r *ConfigurationPolicyReconciler) cleanUpChildObjects(
706717
return deletionFailures
707718
}
708719

709-
// cleanupImmediately returns true (i.e. beingUninstalled or crdDeleting) when the cluster is in a state where
710-
// configurationpolicies should be removed as soon as possible, ignoring the pruneObjectBehavior of the policies. This
720+
// cleanupImmediately returns true when the cluster is in a state where configurationpolicies
721+
// should be removed as soon as possible, ignoring the pruneObjectBehavior of the policies. This
711722
// is the case when the controller is being uninstalled or the CRD is being deleted.
712-
func (r *ConfigurationPolicyReconciler) cleanupImmediately() (beingUninstalled bool, crdDeleting bool, err error) {
713-
var beingUninstalledErr error
714-
715-
beingUninstalled, beingUninstalledErr = IsBeingUninstalled(r.Client)
716-
717-
var defErr error
718-
719-
crdDeleting, defErr = r.definitionIsDeleting()
723+
func (r *ConfigurationPolicyReconciler) cleanupImmediately() (cleanup bool, err error) {
724+
beingUninstalled, beingUninstalledErr := IsBeingUninstalled(r.Client)
725+
crdDeleting, defErr := r.definitionIsDeleting()
720726

721727
switch {
722728
case beingUninstalledErr != nil && defErr != nil:
@@ -727,7 +733,7 @@ func (r *ConfigurationPolicyReconciler) cleanupImmediately() (beingUninstalled b
727733
err = defErr
728734
}
729735

730-
return
736+
return (beingUninstalled || crdDeleting), err
731737
}
732738

733739
func (r *ConfigurationPolicyReconciler) definitionIsDeleting() (bool, error) {
@@ -921,10 +927,6 @@ func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc *policyv1.Conf
921927
return err
922928
}
923929

924-
if returnNow, err := r.manageDeletionFinalizer(plc); err != nil || returnNow {
925-
return err
926-
}
927-
928930
usingWatch := currentlyUsingWatch(plc)
929931

930932
if usingWatch && r.DynamicWatcher != nil {
@@ -1172,52 +1174,39 @@ func (r *ConfigurationPolicyReconciler) validateConfigPolicy(plc *policyv1.Confi
11721174
}
11731175

11741176
// manageDeletionFinalizer sets or removes the finalizer on the ConfigurationPolicy based on the
1175-
// pruneObjectBehavior setting, and whether the controller is being uninstalled. If the controller
1176-
// is being uninstalled, the function will return true, indicating the rest of the policy handling
1177-
// logic should be skipped.
1178-
func (r *ConfigurationPolicyReconciler) manageDeletionFinalizer(plc *policyv1.ConfigurationPolicy,
1179-
) (returnNow bool, err error) {
1180-
if plc.Spec.PruneObjectBehavior == "DeleteIfCreated" || plc.Spec.PruneObjectBehavior == "DeleteAll" {
1181-
uninstalling, crdDeleting, err := r.cleanupImmediately()
1182-
if !uninstalling && !crdDeleting && err != nil {
1183-
log.Error(err, "Error determining whether to cleanup immediately, requeueing policy")
1184-
1185-
return true, err
1186-
}
1187-
1188-
if uninstalling || crdDeleting {
1189-
if objHasFinalizer(plc, pruneObjectFinalizer) {
1190-
patch := removeObjFinalizerPatch(plc, pruneObjectFinalizer)
1191-
1192-
err = r.Patch(context.TODO(), plc, client.RawPatch(types.JSONPatchType, patch))
1193-
if err != nil {
1194-
log.Error(err, "Error removing finalizer for configuration policy")
1177+
// pruneObjectBehavior setting and current `cleanup` state.
1178+
func (r *ConfigurationPolicyReconciler) manageDeletionFinalizer(
1179+
plc *policyv1.ConfigurationPolicy, cleanup bool,
1180+
) (err error) {
1181+
if cleanup {
1182+
if objHasFinalizer(plc, pruneObjectFinalizer) {
1183+
patch := removeObjFinalizerPatch(plc, pruneObjectFinalizer)
1184+
1185+
err = r.Patch(context.TODO(), plc, client.RawPatch(types.JSONPatchType, patch))
1186+
if err != nil {
1187+
log.Error(err, "Error removing finalizer for configuration policy")
11951188

1196-
return true, err
1197-
}
1189+
return err
11981190
}
1199-
1200-
return true, nil
12011191
}
12021192

1193+
return nil
1194+
}
1195+
1196+
if plc.Spec.PruneObjectBehavior == "DeleteIfCreated" || plc.Spec.PruneObjectBehavior == "DeleteAll" {
12031197
// set finalizer if it hasn't been set
12041198
if !objHasFinalizer(plc, pruneObjectFinalizer) {
1205-
var patch []byte
1199+
patch := `[{"op":"add","path":"/metadata/finalizers/-","value":"` + pruneObjectFinalizer + `"}]`
1200+
12061201
if plc.Finalizers == nil {
1207-
patch = []byte(
1208-
`[{"op":"add","path":"/metadata/finalizers","value":["` + pruneObjectFinalizer + `"]}]`,
1209-
)
1210-
} else {
1211-
patch = []byte(
1212-
`[{"op":"add","path":"/metadata/finalizers/-","value":"` + pruneObjectFinalizer + `"}]`,
1213-
)
1202+
patch = `[{"op":"add","path":"/metadata/finalizers","value":["` + pruneObjectFinalizer + `"]}]`
12141203
}
12151204

1216-
err := r.Patch(context.TODO(), plc, client.RawPatch(types.JSONPatchType, patch))
1205+
err := r.Patch(context.TODO(), plc, client.RawPatch(types.JSONPatchType, []byte(patch)))
12171206
if err != nil {
12181207
log.Error(err, "Error setting finalizer for configuration policy")
12191208

1220-
return true, err
1209+
return err
12211210
}
12221211
}
12231212
} else if objHasFinalizer(plc, pruneObjectFinalizer) {
@@ -1228,11 +1217,11 @@ func (r *ConfigurationPolicyReconciler) manageDeletionFinalizer(plc *policyv1.Co
12281217
if err != nil {
12291218
log.Error(err, "Error removing finalizer for configuration policy")
12301219

1231-
return true, err
1220+
return err
12321221
}
12331222
}
12341223

1235-
return false, nil
1224+
return nil
12361225
}
12371226

12381227
// handleDeletion cleans up the child objects, based on the pruneObjectBehavior setting. If all of

0 commit comments

Comments
 (0)