From c22c21d56ad6f0169d72f3890d6c17ab58c7a44f Mon Sep 17 00:00:00 2001 From: Ali Syed Date: Fri, 7 Mar 2025 19:50:11 +0000 Subject: [PATCH] NE-1951: Pre-upgrade Admin Gate for Gateway API CRD Management Succession This PR introduces a pre-upgrade admin gate for Gateway API CRD management succession. It implements an operator which, upon the detection of any Gateway API CRDs will apply an admin gate to inform the cluster admin. There is also an e2e implemented and a conditional check for the operator to skip if the featuregate is enabled. This ensures to block upgrades until a cluster admin explicitly provides consent for the platform to start taking over management of the Gateway API CRDs --- .../gatewayapi-upgradeable/controller.go | 164 ++++++++++++++++++ pkg/operator/controller/names.go | 7 + pkg/operator/operator.go | 9 +- test/e2e/gatewayapi_upgradeable_test.go | 47 ++++- 4 files changed, 220 insertions(+), 7 deletions(-) create mode 100644 pkg/operator/controller/gatewayapi-upgradeable/controller.go diff --git a/pkg/operator/controller/gatewayapi-upgradeable/controller.go b/pkg/operator/controller/gatewayapi-upgradeable/controller.go new file mode 100644 index 0000000000..c3cc26fc03 --- /dev/null +++ b/pkg/operator/controller/gatewayapi-upgradeable/controller.go @@ -0,0 +1,164 @@ +package gatewayapi_upgradeable + +import ( + "context" + "fmt" + + logf "github.com/openshift/cluster-ingress-operator/pkg/log" + operatorcontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller" + + corev1 "k8s.io/api/core/v1" + + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + + "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + gatewayapiv1 "sigs.k8s.io/gateway-api/apis/v1" +) + +const ( + controllerName = "gatewayapi_upgradeable_controller" + gatewayAPIAdminKey = "ack-4.18-gateway-api-management-in-4.19" + gatewayAPIAdminMsg = "Gateway API CRDs have been detected. OCP fully manages the life-cycle of Gateway API CRDs. External management is unsupported and will be prevented. The cluster administrator is responsible for the safety of existing Gateway API implementations and must acknowledge their responsibilities via the admin gate to proceed with upgrades. See https://docs.redhat.com/en/documentation/openshift_container_platform/4.19/html/release_notes/ocp-4-19-release-notes#ocp-4-19-networking-gateway-api-crd-lifecycle_release-notes for details. Failure to read and understand the documentation for this and the implications can result in outages and data loss." +) + +var ( + log = logf.Logger.WithName(controllerName) +) + +// The New function initializes the controller and sets up the watch for the ConfigMap. +func New(mgr manager.Manager) (controller.Controller, error) { + c, err := controller.New(controllerName, mgr, controller.Options{ + Reconciler: &reconciler{ + client: mgr.GetClient(), + cache: mgr.GetCache(), + }, + }) + if err != nil { + return nil, err + } + + // Mapping the events to the admin gate ConfigMap. + toAdminGatesConfigMap := func(_ context.Context, _ client.Object) []reconcile.Request { + return []reconcile.Request{{ + NamespacedName: operatorcontroller.AdminGatesConfigMapName(), + }} + } + + // Defining the CRD predicate. + crdPredicate := predicate.NewPredicateFuncs(func(o client.Object) bool { + group := o.(*apiextensionsv1.CustomResourceDefinition).Spec.Group + return group == gatewayapiv1.GroupName || group == "gateway.networking.x-k8s.io" + }) + + // Setting up a watch for CRD events. + if err := c.Watch(source.Kind[client.Object](mgr.GetCache(), &apiextensionsv1.CustomResourceDefinition{}, handler.EnqueueRequestsFromMapFunc(toAdminGatesConfigMap), crdPredicate)); err != nil { + return nil, err + } + + // A predicate filter to watch for specific changes in the ConfigMap. + // Verify that the ConfigMap's name and namespace match the expected values. + adminGatePredicate := predicate.NewPredicateFuncs(func(o client.Object) bool { + return o.GetNamespace() == operatorcontroller.AdminGatesConfigMapName().Namespace && + o.GetName() == operatorcontroller.AdminGatesConfigMapName().Name + }) + + if err := c.Watch(source.Kind[client.Object](mgr.GetCache(), &corev1.ConfigMap{}, &handler.EnqueueRequestForObject{}, adminGatePredicate)); err != nil { + return nil, err + } + + return c, nil +} + +// reconciler struct holds the client and cache attributes. +type reconciler struct { + client client.Client + cache cache.Cache +} + +// Reconcile function implements the logic to check conditions and manage the admin gate. +func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + log.Info("reconciling", "request", request) + + adminGateConditionExists, err := r.adminGateConditionExists(ctx) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to determine if admin gate condition exists: %w", err) + } + + if adminGateConditionExists { + if err := r.addAdminGate(ctx); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to add admin gate: %w", err) + } + } else { + if err := r.removeAdminGate(ctx); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to remove admin gate: %w", err) + } + } + + return reconcile.Result{}, nil +} + +// adminGateConditionExists checks if the admin gate condition exists based on both ConfigMap and CRDs. +func (r *reconciler) adminGateConditionExists(ctx context.Context) (bool, error) { + crds := &apiextensionsv1.CustomResourceDefinitionList{} + if err := r.cache.List(ctx, crds); err != nil { + return false, fmt.Errorf("failed to list CRDs: %w", err) + } + + for _, crd := range crds.Items { + if crd.Spec.Group == gatewayapiv1.GroupName || crd.Spec.Group == "gateway.networking.x-k8s.io" { + return true, nil + } + } + + return false, nil +} + +// The addAdminGate function is responsible for adding the admin gate to the ConfigMap. +func (r *reconciler) addAdminGate(ctx context.Context) error { + adminGatesConfigMap := &corev1.ConfigMap{} + if err := r.cache.Get(ctx, operatorcontroller.AdminGatesConfigMapName(), adminGatesConfigMap); err != nil { + return fmt.Errorf("failed to get configmap %s: %w", operatorcontroller.AdminGatesConfigMapName(), err) + } + + if adminGatesConfigMap.Data == nil { + adminGatesConfigMap.Data = map[string]string{} + } + + // The function checks if the admin key exists and if it is set to the expected message. + if val, ok := adminGatesConfigMap.Data[gatewayAPIAdminKey]; ok && val == gatewayAPIAdminMsg { + return nil + } + adminGatesConfigMap.Data[gatewayAPIAdminKey] = gatewayAPIAdminMsg + + log.Info("Adding admin gate for Gateway API management") + if err := r.client.Update(ctx, adminGatesConfigMap); err != nil { + return fmt.Errorf("failed to update configmap %s: %w", operatorcontroller.AdminGatesConfigMapName(), err) + } + return nil +} + +// The removeAdminGate function is responsible for removing the admin gate from the ConfigMap. +func (r *reconciler) removeAdminGate(ctx context.Context) error { + adminGatesConfigMap := &corev1.ConfigMap{} + if err := r.cache.Get(ctx, operatorcontroller.AdminGatesConfigMapName(), adminGatesConfigMap); err != nil { + return fmt.Errorf("failed to get configmap %s: %w", operatorcontroller.AdminGatesConfigMapName(), err) + } + + if _, ok := adminGatesConfigMap.Data[gatewayAPIAdminKey]; !ok { + return nil + } + + log.Info("Removing admin gate for Gateway API management") + delete(adminGatesConfigMap.Data, gatewayAPIAdminKey) + if err := r.client.Update(ctx, adminGatesConfigMap); err != nil { + return fmt.Errorf("failed to update configmap %s: %w", operatorcontroller.AdminGatesConfigMapName(), err) + } + return nil +} diff --git a/pkg/operator/controller/names.go b/pkg/operator/controller/names.go index 27aceaf730..313fa853d0 100644 --- a/pkg/operator/controller/names.go +++ b/pkg/operator/controller/names.go @@ -49,6 +49,13 @@ const ( RemoteWorkerLabel = "node.openshift.io/remote-worker" ) +func AdminGatesConfigMapName() types.NamespacedName { + return types.NamespacedName{ + Name: "admin-gates", + Namespace: GlobalMachineSpecifiedConfigNamespace, + } +} + // IngressClusterOperatorName returns the namespaced name of the ClusterOperator // resource for the operator. func IngressClusterOperatorName() types.NamespacedName { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 71c937e298..ab486058c8 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -35,13 +35,13 @@ import ( dnscontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/dns" gatewayservicednscontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/gateway-service-dns" gatewayapicontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/gatewayapi" + gatewayapi_upgradeable "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/gatewayapi-upgradeable" gatewayclasscontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/gatewayclass" ingress "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/ingress" ingresscontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/ingress" ingressclasscontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/ingressclass" statuscontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/status" "github.com/openshift/library-go/pkg/operator/events" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -320,6 +320,13 @@ func New(config operatorconfig.Config, kubeConfig *rest.Config) (*Operator, erro return nil, fmt.Errorf("failed to create gatewayapi controller: %w", err) } + // Add conditional setup for gateway_upgradeable controller only if FeatureGate is not enabled + if !gatewayAPIEnabled { + if _, err := gatewayapi_upgradeable.New(mgr); err != nil { + return nil, fmt.Errorf("failed to create gatewayapi upgradeable controller: %w", err) + } + } + return &Operator{ manager: mgr, // TODO: These are only needed for the default ingress controller stuff, which diff --git a/test/e2e/gatewayapi_upgradeable_test.go b/test/e2e/gatewayapi_upgradeable_test.go index 8b4a209651..92bd48667c 100644 --- a/test/e2e/gatewayapi_upgradeable_test.go +++ b/test/e2e/gatewayapi_upgradeable_test.go @@ -6,15 +6,18 @@ package e2e import ( "context" "testing" - - apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "time" configv1 "github.com/openshift/api/config/v1" "github.com/openshift/api/features" "github.com/openshift/cluster-ingress-operator/pkg/manifests" test_crds "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/test/crds" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" ) var expectedCRDs = []*apiextensionsv1.CustomResourceDefinition{ @@ -30,21 +33,35 @@ var incompatibleCRDs = []*apiextensionsv1.CustomResourceDefinition{ test_crds.TCPRouteCRD_experimental_v1(), } +// TestGatewayAPIUpgradeable verifies the operator's upgradeable condition and admin gate behavior +// when the Gateway API feature gate is disabled. func TestGatewayAPIUpgradeable(t *testing.T) { + t.Parallel() if gatewayAPIEnabled, err := isFeatureGateEnabled(features.FeatureGateGatewayAPI); err != nil { t.Fatalf("error checking feature gate enabled status: %v", err) } else if gatewayAPIEnabled { t.Skip("Gateway API is enabled, skipping TestGatewayAPIUpgradeable") } - t.Parallel() - - defer deleteExistingCRDs(t, append(expectedCRDs, incompatibleCRDs...)) + t.Cleanup(func() { + for _, crd := range append(expectedCRDs, incompatibleCRDs...) { + if err := kclient.Delete(context.TODO(), crd); err != nil { + if errors.IsNotFound(err) { + continue + } + t.Errorf("failed to delete crd %q: %v", crd.Name, err) + } + } + }) createCRDs(t, expectedCRDs) + testAdminGate(t, true) testOperatorUpgradeableCondition(t, true) createCRDs(t, incompatibleCRDs) testOperatorUpgradeableCondition(t, false) + deleteExistingCRDs(t, append(expectedCRDs, incompatibleCRDs...)) + testAdminGate(t, false) + testOperatorUpgradeableCondition(t, true) } func testOperatorUpgradeableCondition(t *testing.T, expectUpgradeable bool) { @@ -69,7 +86,6 @@ func createCRDs(t *testing.T, crds []*apiextensionsv1.CustomResourceDefinition) if !errors.IsAlreadyExists(err) { t.Fatalf("Failed to create CRD %s: %v", crd.Name, err) } - continue } } } @@ -80,3 +96,22 @@ func deleteExistingCRDs(t *testing.T, crds []*apiextensionsv1.CustomResourceDefi deleteExistingCRD(t, crd.Name) } } + +func testAdminGate(t *testing.T, shouldExist bool) { + t.Helper() + adminGatesConfigMap := &corev1.ConfigMap{} + if err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 30*time.Second, false, func(ctx context.Context) (bool, error) { + if err := kclient.Get(ctx, client.ObjectKey{Namespace: "openshift-config-managed", Name: "admin-gates"}, adminGatesConfigMap); err != nil { + t.Logf("Failed to get configmap admin-gates: %v, retrying...", err) + return false, nil + } + return true, nil + }); err != nil { + t.Fatalf("Timed out trying to get admin-gates configmap: %v", err) + } + + _, adminGateKeyExists := adminGatesConfigMap.Data["ack-4.18-gateway-api-management-in-4.19"] + if adminGateKeyExists != shouldExist { + t.Fatalf("Expected admin gate key existence to be %v, but got %v", shouldExist, adminGateKeyExists) + } +}