Skip to content

Commit 5080ea0

Browse files
dprinceBrendan Shephard
andcommitted
Force reinstall of operator resources on release version upgrade
Fixes upgrade failures from 0.4 to main caused by incompatible webhook configuration changes that trigger index out of range panics during manifest merging. When OPENSTACK_RELEASE_VERSION is bumped, the controller now: - Detects the version change by comparing against status.ReleaseVersion - Deletes all owned resources (deployments, services, serviceaccounts, configmaps) - Removes managed webhooks (validating and mutating configurations) - Requeues to recreate resources with new manifests This one-time cleanup ensures a clean slate for incompatible upgrades where the structure of resources (especially webhooks) has changed between versions. Adds ReleaseVersion field to OpenStackStatus to track the deployed version. Jira: OSPRH-23865 Co-authored-by: Brendan Shephard <bshephar@fedora-g16.bne-home.net>
1 parent cb3fdc6 commit 5080ea0

File tree

5 files changed

+110
-1
lines changed

5 files changed

+110
-1
lines changed

api/bases/operator.openstack.org_openstacks.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ spec:
164164
observedGeneration:
165165
format: int64
166166
type: integer
167+
releaseVersion:
168+
type: string
167169
totalOperatorCount:
168170
type: integer
169171
type: object

api/operator/v1beta1/openstack_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ type OpenStackStatus struct {
256256

257257
// ContainerImage - the container image that has been successfully deployed
258258
ContainerImage *string `json:"containerImage,omitempty"`
259+
260+
// ReleaseVersion - the OpenStack release version that has been successfully deployed
261+
ReleaseVersion *string `json:"releaseVersion,omitempty"`
259262
}
260263

261264
// +kubebuilder:object:root=true

api/operator/v1beta1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/operator.openstack.org_openstacks.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ spec:
164164
observedGeneration:
165165
format: int64
166166
type: integer
167+
releaseVersion:
168+
type: string
167169
totalOperatorCount:
168170
type: integer
169171
type: object

internal/controller/operator/openstack_controller.go

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"github.com/openstack-k8s-operators/openstack-operator/internal/operator"
4444
"github.com/openstack-k8s-operators/openstack-operator/internal/operator/bindata"
4545
"github.com/pkg/errors"
46+
admissionv1 "k8s.io/api/admissionregistration/v1"
4647
appsv1 "k8s.io/api/apps/v1"
4748
corev1 "k8s.io/api/core/v1"
4849
discoveryv1 "k8s.io/api/discovery/v1"
@@ -250,6 +251,39 @@ func (r *OpenStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
250251
return ctrl.Result{}, err
251252
}
252253

254+
// Check if OPENSTACK_RELEASE_VERSION has changed - if so, delete all owned resources
255+
// This is a one-time fix to handle incompatible upgrades
256+
if instance.Status.ReleaseVersion != nil && *instance.Status.ReleaseVersion != openstackReleaseVersion {
257+
Log.Info("OpenStack release version changed, deleting all owned resources",
258+
"old", *instance.Status.ReleaseVersion,
259+
"new", openstackReleaseVersion)
260+
261+
if err := r.deleteAllOwnedResources(ctx, instance); err != nil {
262+
instance.Status.Conditions.Set(condition.FalseCondition(
263+
operatorv1beta1.OpenStackOperatorReadyCondition,
264+
condition.ErrorReason,
265+
condition.SeverityWarning,
266+
operatorv1beta1.OpenStackOperatorErrorMessage,
267+
err))
268+
return ctrl.Result{}, err
269+
}
270+
271+
// Reset the container image status to force re-application of CRDs and RBAC
272+
instance.Status.ContainerImage = nil
273+
274+
// Update the release version in status
275+
instance.Status.ReleaseVersion = &openstackReleaseVersion
276+
277+
// Requeue to allow resources to be deleted before recreating
278+
Log.Info("Resources deleted, requeuing to recreate with new version")
279+
return ctrl.Result{RequeueAfter: time.Duration(5) * time.Second}, nil
280+
}
281+
282+
// Set the release version if not set
283+
if instance.Status.ReleaseVersion == nil {
284+
instance.Status.ReleaseVersion = &openstackReleaseVersion
285+
}
286+
253287
if err := r.applyManifests(ctx, instance); err != nil {
254288
instance.Status.Conditions.Set(condition.FalseCondition(
255289
operatorv1beta1.OpenStackOperatorReadyCondition,
@@ -316,6 +350,69 @@ func (r *OpenStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
316350

317351
}
318352

353+
func deleteOwnedResources[L any, T any](
354+
ctx context.Context,
355+
r *OpenStackReconciler,
356+
instance client.Object,
357+
list L,
358+
itemsGetter func(L) []T,
359+
) error {
360+
log := r.GetLogger(ctx)
361+
362+
err := r.List(ctx, any(list).(client.ObjectList), &client.ListOptions{Namespace: instance.GetNamespace()})
363+
if err != nil {
364+
return errors.Wrap(err, "failed to list resources")
365+
}
366+
367+
for _, item := range itemsGetter(list) {
368+
obj := any(&item).(client.Object)
369+
if metav1.IsControlledBy(obj, instance) {
370+
log.Info("Deleting owned resource", "kind", obj.GetObjectKind().GroupVersionKind().Kind, "name", obj.GetName())
371+
err := r.Delete(ctx, obj)
372+
if err != nil && !apierrors.IsNotFound(err) {
373+
return errors.Wrapf(err, "failed to delete %s", obj.GetName())
374+
}
375+
}
376+
}
377+
return nil
378+
}
379+
380+
func (r *OpenStackReconciler) deleteAllOwnedResources(ctx context.Context, instance *operatorv1beta1.OpenStack) error {
381+
Log := r.GetLogger(ctx)
382+
Log.Info("Deleting all owned resources for release version upgrade")
383+
384+
err := deleteOwnedResources(ctx, r, instance, &appsv1.DeploymentList{}, func(l *appsv1.DeploymentList) []appsv1.Deployment { return l.Items })
385+
if err != nil {
386+
return err
387+
}
388+
389+
err = deleteOwnedResources(ctx, r, instance, &corev1.ServiceAccountList{}, func(l *corev1.ServiceAccountList) []corev1.ServiceAccount { return l.Items })
390+
if err != nil {
391+
return err
392+
}
393+
394+
err = deleteOwnedResources(ctx, r, instance, &corev1.ServiceList{}, func(l *corev1.ServiceList) []corev1.Service { return l.Items })
395+
if err != nil {
396+
return err
397+
}
398+
399+
labelSelector, _ := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
400+
MatchLabels: map[string]string{"openstack.openstack.org/managed": "true"},
401+
})
402+
403+
deleteOpts := client.DeleteAllOfOptions{
404+
ListOptions: client.ListOptions{LabelSelector: labelSelector},
405+
}
406+
407+
err = r.Client.DeleteAllOf(ctx, &admissionv1.ValidatingWebhookConfiguration{}, &deleteOpts)
408+
if err != nil && !apierrors.IsNotFound(err) {
409+
return errors.Wrap(err, "failed to delete validating webhooks")
410+
}
411+
412+
Log.Info("All owned resources deleted successfully")
413+
return nil
414+
}
415+
319416
func (r *OpenStackReconciler) reconcileDelete(ctx context.Context, instance *operatorv1beta1.OpenStack, helper *helper.Helper) (ctrl.Result, error) {
320417
Log := r.GetLogger(ctx)
321418
Log.Info("Reconciling OpenStack initialization resource delete")
@@ -987,7 +1084,7 @@ func (r *OpenStackReconciler) postCleanupObsoleteResources(ctx context.Context,
9871084
// The horizon-operator.openstack-operators has references to old roles/bindings
9881085
// the code below will delete those references before continuing
9891086
for _, ref := range refs {
990-
refData := ref.(map[string]interface{})
1087+
refData := ref.(map[string]any)
9911088
Log.Info("Deleting operator reference", "Reference", ref)
9921089
obj := uns.Unstructured{}
9931090
obj.SetName(refData["name"].(string))

0 commit comments

Comments
 (0)