-
Notifications
You must be signed in to change notification settings - Fork 53
NO-JIRA: Force webhook failure policy to Ignore until cluster has bootstrapped #476
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -20,11 +20,14 @@ import ( | |||||
| "io/fs" | ||||||
| "path/filepath" | ||||||
|
|
||||||
| admissionregistrationv1 "k8s.io/api/admissionregistration/v1" | ||||||
| v1 "k8s.io/api/core/v1" | ||||||
| apierrors "k8s.io/apimachinery/pkg/api/errors" | ||||||
| "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||||||
| k8serrors "k8s.io/apimachinery/pkg/util/errors" | ||||||
| "k8s.io/client-go/kubernetes" | ||||||
| "k8s.io/utils/clock" | ||||||
| "k8s.io/utils/ptr" | ||||||
| ctrl "sigs.k8s.io/controller-runtime" | ||||||
| "sigs.k8s.io/controller-runtime/pkg/builder" | ||||||
| "sigs.k8s.io/controller-runtime/pkg/client" | ||||||
|
|
@@ -39,6 +42,7 @@ import ( | |||||
| "github.com/openshift/cluster-capi-operator/pkg/util" | ||||||
| "github.com/openshift/library-go/pkg/operator/events" | ||||||
| "github.com/openshift/library-go/pkg/operator/resource/resourceapply" | ||||||
| "github.com/openshift/library-go/pkg/operator/resource/resourceread" | ||||||
| ) | ||||||
|
|
||||||
| // Assets is an interface that can be used to read assets from a filesystem. | ||||||
|
|
@@ -48,8 +52,10 @@ type Assets interface { | |||||
| } | ||||||
|
|
||||||
| type staticResourceInstallerController struct { | ||||||
| assetNames []string // The names of the assets to install. | ||||||
| kubeClient kubernetes.Interface | ||||||
| assetNames []string // The names of the assets to install. | ||||||
| client client.Client | ||||||
| kubeClient kubernetes.Interface | ||||||
| initialClusterOperatorsBootstrapped bool | ||||||
|
|
||||||
| assets Assets | ||||||
| resourceCache resourceapply.ResourceCache | ||||||
|
|
@@ -65,6 +71,8 @@ func NewStaticResourceInstallerController(assets Assets) *staticResourceInstalle | |||||
|
|
||||||
| // SetupWithManager sets up the static resource installer controller with the given manager. | ||||||
| func (c *staticResourceInstallerController) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { | ||||||
| c.client = mgr.GetClient() | ||||||
|
|
||||||
| // The assets are an embedded filesystem and won't change over time. | ||||||
| assets, err := c.assets.ReadAssets() | ||||||
| if err != nil { | ||||||
|
|
@@ -82,11 +90,11 @@ func (c *staticResourceInstallerController) SetupWithManager(ctx context.Context | |||||
|
|
||||||
| build := ctrl.NewControllerManagedBy(mgr). | ||||||
| Named("static-resource-installer"). | ||||||
| // We only want to reconcile an initial time when the cluster operator is created | ||||||
| // in the cache, later reconciles will happen based on watches for individual assets. | ||||||
| // We only want to reconcile updates until we have observed that the cluster operators are all bootstrapped. | ||||||
| // This allows us to inject FailurePolicy: Ignore for webhooks during cluster bootstrap. | ||||||
| For(&configv1.ClusterOperator{}, builder.WithPredicates(predicate.Funcs{ | ||||||
| CreateFunc: func(e event.CreateEvent) bool { return e.Object.GetName() == controllers.ClusterOperatorName }, | ||||||
| UpdateFunc: func(e event.UpdateEvent) bool { return false }, | ||||||
| UpdateFunc: func(e event.UpdateEvent) bool { return !c.initialClusterOperatorsBootstrapped }, | ||||||
| DeleteFunc: func(e event.DeleteEvent) bool { return false }, | ||||||
| GenericFunc: func(e event.GenericEvent) bool { return false }, | ||||||
| })) | ||||||
|
|
@@ -124,7 +132,7 @@ func (c *staticResourceInstallerController) Reconcile(ctx context.Context, req c | |||||
| Name: "cluster-api", | ||||||
| }, clock.RealClock{}), | ||||||
| c.resourceCache, | ||||||
| c.assets.Asset, | ||||||
| c.mutateAsset(ctx), | ||||||
| c.assetNames..., | ||||||
| ) | ||||||
|
|
||||||
|
|
@@ -162,3 +170,218 @@ func objectNamePredicate(name string) predicate.Predicate { | |||||
| return obj.GetName() == name | ||||||
| }) | ||||||
| } | ||||||
|
|
||||||
| func (c *staticResourceInstallerController) mutateAsset(ctx context.Context) func(string) ([]byte, error) { | ||||||
| return func(name string) ([]byte, error) { | ||||||
| raw, err := c.assets.Asset(name) | ||||||
| if err != nil { | ||||||
| return nil, fmt.Errorf("failed to read asset %s: %w", name, err) | ||||||
| } | ||||||
|
|
||||||
| requiredObj, err := resourceread.ReadGenericWithUnstructured(raw) | ||||||
| if err != nil { | ||||||
| return nil, fmt.Errorf("failed to decode asset %s: %w", name, err) | ||||||
| } | ||||||
|
|
||||||
| switch t := requiredObj.(type) { | ||||||
| case *admissionregistrationv1.ValidatingWebhookConfiguration: | ||||||
| return c.mutateValidatingWebhookConfiguration(ctx, raw, t) | ||||||
| case *admissionregistrationv1.MutatingWebhookConfiguration: | ||||||
| return c.mutateMutatingWebhookConfiguration(ctx, raw, t) | ||||||
| } | ||||||
|
|
||||||
| return raw, nil | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| type webhookPolicy struct { | ||||||
| Name string | ||||||
| FailurePolicy *admissionregistrationv1.FailurePolicyType | ||||||
| } | ||||||
|
|
||||||
| func (c *staticResourceInstallerController) mutateValidatingWebhookConfiguration(ctx context.Context, raw []byte, obj *admissionregistrationv1.ValidatingWebhookConfiguration) ([]byte, error) { | ||||||
| currentObj := &admissionregistrationv1.ValidatingWebhookConfiguration{} | ||||||
|
|
||||||
| if err := c.client.Get(ctx, client.ObjectKey{Name: obj.Name}, currentObj); err != nil && !apierrors.IsNotFound(err) { | ||||||
| return nil, fmt.Errorf("failed to get validating webhook configuration %s: %w", obj.Name, err) | ||||||
| } else if err != nil && apierrors.IsNotFound(err) { | ||||||
| // If the object doesn't currently exist, apply it initially with the failure policy set to ignore | ||||||
| // so that we don't block cluster operators during cluster bootstrap. | ||||||
| for i := range obj.Webhooks { | ||||||
| obj.Webhooks[i].FailurePolicy = ptr.To(admissionregistrationv1.Ignore) | ||||||
| } | ||||||
|
|
||||||
| data, err := k8syaml.Marshal(obj) | ||||||
| if err != nil { | ||||||
| return nil, fmt.Errorf("failed to marshal object: %w", err) | ||||||
| } | ||||||
|
|
||||||
| return data, nil | ||||||
| } | ||||||
|
Comment on lines
+205
to
+220
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Avoid forcing Line 207 and Line 261 return early with Suggested fix (gate NotFound path on bootstrap state)- } else if err != nil && apierrors.IsNotFound(err) {
+ } else if err != nil && apierrors.IsNotFound(err) {
+ bootstrapped, berr := c.clusterOperatorsBootstrapped(ctx)
+ if berr != nil {
+ return nil, fmt.Errorf("failed to check if cluster operators are bootstrapped: %w", berr)
+ }
+ if bootstrapped {
+ return raw, nil
+ }
// If the object doesn't currently exist, apply it initially with the failure policy set to ignore
// so that we don't block cluster operators during cluster bootstrap.
for i := range obj.Webhooks {
obj.Webhooks[i].FailurePolicy = ptr.To(admissionregistrationv1.Ignore)
}Also applies to: 259-274 🤖 Prompt for AI Agents |
||||||
|
|
||||||
| clusterBootstrapped, err := c.clusterBootstrapped(ctx, | ||||||
| util.SliceMap(obj.Webhooks, func(webhook admissionregistrationv1.ValidatingWebhook) webhookPolicy { | ||||||
| return webhookPolicy{ | ||||||
| Name: webhook.Name, | ||||||
| FailurePolicy: webhook.FailurePolicy, | ||||||
| } | ||||||
| }), | ||||||
| util.SliceMap(currentObj.Webhooks, func(webhook admissionregistrationv1.ValidatingWebhook) webhookPolicy { | ||||||
| return webhookPolicy{ | ||||||
| Name: webhook.Name, | ||||||
| FailurePolicy: webhook.FailurePolicy, | ||||||
| } | ||||||
| })) | ||||||
| if err != nil { | ||||||
| return nil, fmt.Errorf("failed to check if cluster is bootstrapped: %w", err) | ||||||
| } | ||||||
|
|
||||||
| if clusterBootstrapped { | ||||||
| return raw, nil | ||||||
| } | ||||||
|
|
||||||
| // Cluster isn't yet bootstrapped, force all webhooks to ignore failures so that we don't block cluster operators during cluster bootstrap. | ||||||
| for i := range obj.Webhooks { | ||||||
| obj.Webhooks[i].FailurePolicy = ptr.To(admissionregistrationv1.Ignore) | ||||||
| } | ||||||
|
|
||||||
| data, err := k8syaml.Marshal(obj) | ||||||
| if err != nil { | ||||||
| return nil, fmt.Errorf("failed to marshal object: %w", err) | ||||||
| } | ||||||
|
|
||||||
| return data, nil | ||||||
| } | ||||||
|
|
||||||
| func (c *staticResourceInstallerController) mutateMutatingWebhookConfiguration(ctx context.Context, raw []byte, obj *admissionregistrationv1.MutatingWebhookConfiguration) ([]byte, error) { | ||||||
| currentObj := &admissionregistrationv1.MutatingWebhookConfiguration{} | ||||||
|
|
||||||
| if err := c.client.Get(ctx, client.ObjectKey{Name: obj.Name}, currentObj); err != nil && !apierrors.IsNotFound(err) { | ||||||
| return nil, fmt.Errorf("failed to get validating webhook configuration %s: %w", obj.Name, err) | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix incorrect resource type in error message. Line 260 is in Suggested fix- return nil, fmt.Errorf("failed to get validating webhook configuration %s: %w", obj.Name, err)
+ return nil, fmt.Errorf("failed to get mutating webhook configuration %s: %w", obj.Name, err)📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||
| } else if err != nil && apierrors.IsNotFound(err) { | ||||||
| // If the object doesn't currently exist, apply it initially with the failure policy set to ignore | ||||||
| // so that we don't block cluster operators during cluster bootstrap. | ||||||
| for i := range obj.Webhooks { | ||||||
| obj.Webhooks[i].FailurePolicy = ptr.To(admissionregistrationv1.Ignore) | ||||||
| } | ||||||
|
|
||||||
| data, err := k8syaml.Marshal(obj) | ||||||
| if err != nil { | ||||||
| return nil, fmt.Errorf("failed to marshal object: %w", err) | ||||||
| } | ||||||
|
|
||||||
| return data, nil | ||||||
| } | ||||||
|
|
||||||
| clusterBootstrapped, err := c.clusterBootstrapped(ctx, | ||||||
| util.SliceMap(obj.Webhooks, func(webhook admissionregistrationv1.MutatingWebhook) webhookPolicy { | ||||||
| return webhookPolicy{ | ||||||
| Name: webhook.Name, | ||||||
| FailurePolicy: webhook.FailurePolicy, | ||||||
| } | ||||||
| }), | ||||||
| util.SliceMap(currentObj.Webhooks, func(webhook admissionregistrationv1.MutatingWebhook) webhookPolicy { | ||||||
| return webhookPolicy{ | ||||||
| Name: webhook.Name, | ||||||
| FailurePolicy: webhook.FailurePolicy, | ||||||
| } | ||||||
| }), | ||||||
| ) | ||||||
| if err != nil { | ||||||
| return nil, fmt.Errorf("failed to check if cluster is bootstrapped: %w", err) | ||||||
| } | ||||||
|
|
||||||
| if clusterBootstrapped { | ||||||
| return raw, nil | ||||||
| } | ||||||
|
|
||||||
| // Cluster isn't yet bootstrapped, force all webhooks to ignore failures so that we don't block cluster operators during cluster bootstrap. | ||||||
| for i := range obj.Webhooks { | ||||||
| obj.Webhooks[i].FailurePolicy = ptr.To(admissionregistrationv1.Ignore) | ||||||
| } | ||||||
|
|
||||||
| data, err := k8syaml.Marshal(obj) | ||||||
| if err != nil { | ||||||
| return nil, fmt.Errorf("failed to marshal object: %w", err) | ||||||
| } | ||||||
|
|
||||||
| return data, nil | ||||||
| } | ||||||
|
|
||||||
| func (c *staticResourceInstallerController) clusterBootstrapped(ctx context.Context, webhooks, currentWebhooks []webhookPolicy) (bool, error) { | ||||||
| // First check if the existing policies match the desired policies. | ||||||
| // This means we already applied the manifest as it comes from the assets directly | ||||||
| // without any mutation. To do that, the cluster must already have been bootstrapped. | ||||||
| policiesMatch := true | ||||||
|
|
||||||
| for _, webhook := range webhooks { | ||||||
| for _, currentWebhook := range currentWebhooks { | ||||||
| if webhook.Name == currentWebhook.Name { | ||||||
| policiesMatch = policiesMatch && webhook.FailurePolicy != nil && currentWebhook.FailurePolicy != nil && *webhook.FailurePolicy == *currentWebhook.FailurePolicy | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| if policiesMatch { | ||||||
| return true, nil | ||||||
| } | ||||||
|
Comment on lines
+311
to
+327
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
At Line 317, the nested comparison only updates Suggested fix (compare by name with existence checks) func (c *staticResourceInstallerController) clusterBootstrapped(ctx context.Context, webhooks, currentWebhooks []webhookPolicy) (bool, error) {
- policiesMatch := true
-
- for _, webhook := range webhooks {
- for _, currentWebhook := range currentWebhooks {
- if webhook.Name == currentWebhook.Name {
- policiesMatch = policiesMatch && webhook.FailurePolicy != nil && currentWebhook.FailurePolicy != nil && *webhook.FailurePolicy == *currentWebhook.FailurePolicy
- }
- }
- }
+ if len(webhooks) != len(currentWebhooks) {
+ return c.clusterOperatorsBootstrapped(ctx)
+ }
+
+ currentByName := map[string]webhookPolicy{}
+ for _, currentWebhook := range currentWebhooks {
+ currentByName[currentWebhook.Name] = currentWebhook
+ }
+
+ policiesMatch := true
+ for _, webhook := range webhooks {
+ currentWebhook, ok := currentByName[webhook.Name]
+ if !ok {
+ policiesMatch = false
+ break
+ }
+ policiesMatch = policiesMatch &&
+ webhook.FailurePolicy != nil &&
+ currentWebhook.FailurePolicy != nil &&
+ *webhook.FailurePolicy == *currentWebhook.FailurePolicy
+ }
if policiesMatch {
return true, nil
}🤖 Prompt for AI Agents |
||||||
|
|
||||||
| return c.clusterOperatorsBootstrapped(ctx) | ||||||
| } | ||||||
|
|
||||||
| func (c *staticResourceInstallerController) clusterOperatorsBootstrapped(ctx context.Context) (bool, error) { | ||||||
| if c.initialClusterOperatorsBootstrapped { | ||||||
| // We have previously seen all cluster operators bootstrapped since we started the controller. | ||||||
| return true, nil | ||||||
| } | ||||||
|
|
||||||
| // Check all cluster operators and wait for them all to be bootstrapped. | ||||||
| // Once they are bootstrapped, we can apply the manifest directly | ||||||
| // as it is within the assets folder. | ||||||
| clusterOperators := &configv1.ClusterOperatorList{} | ||||||
| if err := c.client.List(ctx, clusterOperators); err != nil { | ||||||
| return false, fmt.Errorf("failed to list cluster operators: %w", err) | ||||||
| } | ||||||
|
|
||||||
| for _, clusterOperator := range clusterOperators.Items { | ||||||
| if !clusterOperatorBootstrapped(clusterOperator) { | ||||||
| return false, nil | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| c.initialClusterOperatorsBootstrapped = true | ||||||
|
|
||||||
| return true, nil | ||||||
| } | ||||||
|
|
||||||
| func clusterOperatorBootstrapped(clusterOperator configv1.ClusterOperator) bool { | ||||||
| conditions := clusterOperator.Status.Conditions | ||||||
|
|
||||||
| available, ok := clusterOperatorCondition(conditions, configv1.OperatorAvailable) | ||||||
| if !ok { | ||||||
| return false | ||||||
| } | ||||||
|
|
||||||
| progressing, ok := clusterOperatorCondition(conditions, configv1.OperatorProgressing) | ||||||
| if !ok { | ||||||
| return false | ||||||
| } | ||||||
|
|
||||||
| degraded, ok := clusterOperatorCondition(conditions, configv1.OperatorDegraded) | ||||||
| if !ok { | ||||||
| return false | ||||||
| } | ||||||
|
|
||||||
| return available.Status == configv1.ConditionTrue && | ||||||
| progressing.Status == configv1.ConditionFalse && | ||||||
| degraded.Status == configv1.ConditionFalse | ||||||
| } | ||||||
|
|
||||||
| func clusterOperatorCondition(conditions []configv1.ClusterOperatorStatusCondition, conditionType configv1.ClusterStatusConditionType) (configv1.ClusterOperatorStatusCondition, bool) { | ||||||
| for _, condition := range conditions { | ||||||
| if condition.Type == conditionType { | ||||||
| return condition, true | ||||||
| } | ||||||
| } | ||||||
| return configv1.ClusterOperatorStatusCondition{}, false | ||||||
| } | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: openshift/cluster-capi-operator
Length of output: 1001
🏁 Script executed:
# Read the relevant sections of the controller file wc -l pkg/controllers/staticresourceinstaller/controller.goRepository: openshift/cluster-capi-operator
Length of output: 132
🏁 Script executed:
Repository: openshift/cluster-capi-operator
Length of output: 814
🏁 Script executed:
Repository: openshift/cluster-capi-operator
Length of output: 1110
🏁 Script executed:
Repository: openshift/cluster-capi-operator
Length of output: 1145
🏁 Script executed:
Repository: openshift/cluster-capi-operator
Length of output: 884
🏁 Script executed:
Repository: openshift/cluster-capi-operator
Length of output: 926
Protect
initialClusterOperatorsBootstrappedwith synchronization to prevent data race.Line 97 reads this flag from predicate callbacks that run in the watch event loop, while lines 333 and 352 read and write it during reconciliation in the worker pool. These concurrent execution contexts create an unsynchronized access to a shared boolean field, which is a data race. Use
atomic.Boolto synchronize access.Additionally, the error message at line 260 incorrectly refers to "validating webhook configuration" when the function mutates a
MutatingWebhookConfiguration. Update the error text to say "mutating webhook configuration".🤖 Prompt for AI Agents