diff --git a/Makefile b/Makefile index ff26aa24bb..b79be88be9 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ release-local: .PHONY: test-e2e test-e2e: generate - CGO_ENABLED=1 $(GO) test -timeout 1h -count 1 -v -tags e2e -run "$(TEST)" ./test/e2e + CGO_ENABLED=1 $(GO) test -timeout 1.5h -count 1 -v -tags e2e -run "$(TEST)" ./test/e2e .PHONY: test-e2e-list test-e2e-list: generate diff --git a/manifests/01-validating-admission-policy-binding.yaml b/manifests/01-validating-admission-policy-binding.yaml new file mode 100644 index 0000000000..bbe751c3bf --- /dev/null +++ b/manifests/01-validating-admission-policy-binding.yaml @@ -0,0 +1,13 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: openshift-ingress-operator-gatewayapi-crd-admission + annotations: + capability.openshift.io/name: Ingress + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + release.openshift.io/feature-set: "TechPreviewNoUpgrade,CustomNoUpgrade" +spec: + policyName: openshift-ingress-operator-gatewayapi-crd-admission + validationActions: [Deny] diff --git a/manifests/01-validating-admission-policy.yaml b/manifests/01-validating-admission-policy.yaml new file mode 100644 index 0000000000..32c5628946 --- /dev/null +++ b/manifests/01-validating-admission-policy.yaml @@ -0,0 +1,36 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: openshift-ingress-operator-gatewayapi-crd-admission + annotations: + capability.openshift.io/name: Ingress + include.release.openshift.io/ibm-cloud-managed: "true" + include.release.openshift.io/self-managed-high-availability: "true" + include.release.openshift.io/single-node-developer: "true" + release.openshift.io/feature-set: "TechPreviewNoUpgrade,CustomNoUpgrade" +spec: + matchConstraints: + # Consider only requests to CRD resources. + resourceRules: + - apiGroups: ["apiextensions.k8s.io"] + apiVersions: ["v1"] + operations: ["CREATE","UPDATE","DELETE"] + resources: ["customresourcedefinitions"] + matchConditions: + # Consider only request to Gateway API CRDs. + - name: "check-only-gateway-api-crds" + # When the operation is DELETE, the "object" variable is null. + expression: "(request.operation == 'DELETE' ? oldObject : object).spec.group == 'gateway.networking.k8s.io'" + # Validations are evaluated in the the order of their declaration. + validations: + # Verify that the request was sent by the ingress operator's service account. + - expression: "has(request.userInfo.username) && (request.userInfo.username == 'system:serviceaccount:openshift-ingress-operator:ingress-operator')" + message: "Gateway API Custom Resource Definitions are managed by the Ingress Operator and may not be modified" + reason: Forbidden + # Verify that the request was sent from a pod. The presence of both "node" and "pod" claims implies that the token is bound to a pod object. + # Ref: https://kubernetes.io/docs/reference/access-authn-authz/service-accounts-admin/#schema-for-service-account-private-claims. + - expression: "has(request.userInfo.extra) && ('authentication.kubernetes.io/node-name' in request.userInfo.extra) && ('authentication.kubernetes.io/pod-name' in request.userInfo.extra)" + message: "this user must have both \"authentication.kubernetes.io/node-name\" and \"authentication.kubernetes.io/pod-name\" claims" + reason: Forbidden + # Fail the admission if any validation evaluates to false. + failurePolicy: Fail diff --git a/test/e2e/all_test.go b/test/e2e/all_test.go index 1f4e3a1652..2d499afd3b 100644 --- a/test/e2e/all_test.go +++ b/test/e2e/all_test.go @@ -83,7 +83,6 @@ func TestAll(t *testing.T) { t.Run("TestSetRouteResponseHeaders", TestSetRouteResponseHeaders) t.Run("TestReconcileInternalService", TestReconcileInternalService) t.Run("TestConnectTimeout", TestConnectTimeout) - t.Run("TestGatewayAPI", TestGatewayAPI) t.Run("TestAWSLBSubnets", TestAWSLBSubnets) t.Run("TestUnmanagedAWSLBSubnets", TestUnmanagedAWSLBSubnets) t.Run("TestAWSEIPAllocationsForNLB", TestAWSEIPAllocationsForNLB) @@ -126,5 +125,16 @@ func TestAll(t *testing.T) { t.Run("TestRouteHardStopAfterEnableOnIngressControllerHasPriorityOverIngressConfig", TestRouteHardStopAfterEnableOnIngressControllerHasPriorityOverIngressConfig) t.Run("TestHostNetworkPortBinding", TestHostNetworkPortBinding) t.Run("TestDashboardCreation", TestDashboardCreation) + // TestGatewayAPI creates a test ServiceMeshControlPlane (SMCP) resource, + // which triggers the creation of a mutating webhook for all pods in the cluster. + // This introduces a race condition where any pod creation request between + // the webhook's creation and the SMCP pod becoming ready can fail with: + // + // failed calling webhook "sidecar-injector.istio.io": failed to call webhook: + // no endpoints available for service "istiod-openshift-gateway" + // + // Serializing the test ensures it runs in isolation with other tests, + // preventing any impact of the mutating webhook on pod creation in the cluster + t.Run("TestGatewayAPI", TestGatewayAPI) }) } diff --git a/test/e2e/gateway_api_test.go b/test/e2e/gateway_api_test.go index 0b1f99da7f..d07fa3c03c 100644 --- a/test/e2e/gateway_api_test.go +++ b/test/e2e/gateway_api_test.go @@ -10,14 +10,20 @@ import ( "testing" "github.com/openshift/api/features" + operatorclient "github.com/openshift/cluster-ingress-operator/pkg/operator/client" operatorcontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller" "github.com/openshift/cluster-ingress-operator/pkg/operator/controller/gatewayclass" corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apiserver/pkg/storage/names" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/config" gatewayapiv1 "sigs.k8s.io/gateway-api/apis/v1" ) @@ -30,6 +36,8 @@ const ( expectedCatalogSourceNamespace = "openshift-marketplace" // The test gateway name used in multiple places. testGatewayName = "test-gateway" + // gwapiCRDVAPName is the name of the ingress operator's Validating Admission Policy (VAP). + gwapiCRDVAPName = "openshift-ingress-operator-gatewayapi-crd-admission" ) var crdNames = []string{ @@ -51,8 +59,6 @@ var defaultRoutename = "" // feature gate is still in effect, preface the test names with "TestGatewayAPI" // so that they run via the openshift/release test configuration. func TestGatewayAPI(t *testing.T) { - t.Parallel() - // Skip if feature is not enabled if gatewayAPIEnabled, err := isFeatureGateEnabled(features.FeatureGateGatewayAPI); err != nil { t.Fatalf("error checking feature gate enabled status: %v", err) @@ -84,6 +90,7 @@ func TestGatewayAPI(t *testing.T) { } else { t.Log("Gateway API Controller not enabled, skipping testGatewayAPIObjects and testGatewayAPIIstioInstallation") } + t.Run("testGatewayAPIResourcesProtection", testGatewayAPIResourcesProtection) } // testGatewayAPIResources tests that Gateway API Custom Resource Definitions are available. @@ -167,6 +174,93 @@ func testGatewayAPIObjects(t *testing.T) { } } +// testGatewayAPIResourcesProtection verifies that the ingress operator's Validating Admission Policy +// denies admission requests attempting to modify Gateway API CRDs on behalf of a user +// who is not the ingress operator's service account. +func testGatewayAPIResourcesProtection(t *testing.T) { + t.Helper() + + // Get kube client which impersonates ingress operator's service account. + kubeConfig, err := config.GetConfig() + if err != nil { + t.Fatalf("failed to get kube config: %v", err) + } + kubeConfig.Impersonate = rest.ImpersonationConfig{ + UserName: "system:serviceaccount:openshift-ingress-operator:ingress-operator", + } + kubeClient, err := operatorclient.NewClient(kubeConfig) + if err != nil { + t.Fatalf("failed to to create kube client: %v", err) + } + + // Create test CRDs. + var testCRDs []*apiextensionsv1.CustomResourceDefinition + for _, name := range crdNames { + testCRDs = append(testCRDs, buildGWAPICRDFromName(name)) + } + + testCases := []struct { + name string + kclient client.Client + expectedErrMsg string + }{ + { + name: "Ingress operator service account required", + kclient: kclient, + expectedErrMsg: "Gateway API Custom Resource Definitions are managed by the Ingress Operator and may not be modified", + }, + { + name: "Pod binding required", + kclient: kubeClient, + expectedErrMsg: "this user must have both \"authentication.kubernetes.io/node-name\" and \"authentication.kubernetes.io/pod-name\" claims", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Verify that GatewayAPI CRD creation is forbidden. + for i := range testCRDs { + if err := tc.kclient.Create(context.Background(), testCRDs[i]); err != nil { + if !strings.Contains(err.Error(), tc.expectedErrMsg) { + t.Errorf("unexpected error received while creating CRD %q: %v", testCRDs[i].Name, err) + } + } else { + t.Errorf("admission error is expected while creating CRD %q but not received", testCRDs[i].Name) + } + } + + // Verify that GatewayAPI CRD update is forbidden. + for i := range testCRDs { + crdName := types.NamespacedName{Name: testCRDs[i].Name} + crd := &apiextensionsv1.CustomResourceDefinition{} + if err := tc.kclient.Get(context.Background(), crdName, crd); err != nil { + t.Errorf("failed to get %q CRD: %v", crdName.Name, err) + continue + } + crd.Spec = testCRDs[i].Spec + if err := tc.kclient.Update(context.Background(), crd); err != nil { + if !strings.Contains(err.Error(), tc.expectedErrMsg) { + t.Errorf("unexpected error received while updating CRD %q: %v", testCRDs[i].Name, err) + } + } else { + t.Errorf("admission error is expected while updating CRD %q but not received", testCRDs[i].Name) + } + } + + // Verify that GatewayAPI CRD deletion is forbidden. + for i := range testCRDs { + if err := tc.kclient.Delete(context.Background(), testCRDs[i]); err != nil { + if !strings.Contains(err.Error(), tc.expectedErrMsg) { + t.Errorf("unexpected error received while deleting CRD %q: %v", testCRDs[i].Name, err) + } + } else { + t.Errorf("admission error is expected while deleting CRD %q but not received", testCRDs[i].Name) + } + } + }) + } +} + // ensureCRDs tests that the Gateway API custom resource definitions exist. func ensureCRDs(t *testing.T) { t.Helper() @@ -182,6 +276,19 @@ func ensureCRDs(t *testing.T) { // deleteCRDs deletes Gateway API custom resource definitions. func deleteCRDs(t *testing.T) { t.Helper() + + vm := newVAPManager(t, gwapiCRDVAPName) + // Remove the ingress operator's Validating Admission Policy (VAP) + // which prevents modifications of Gateway API CRDs + // by anything other than the ingress operator. + if err, recoverFn := vm.disable(); err != nil { + defer recoverFn() + t.Fatalf("failed to disable vap: %v", err) + } + // Put back the VAP to ensure that it does not prevent + // the ingress operator from managing Gateway API CRDs. + defer vm.enable() + for _, crdName := range crdNames { err := deleteExistingCRD(t, crdName) if err != nil { diff --git a/test/e2e/util_gatewayapi_test.go b/test/e2e/util_gatewayapi_test.go index f999cebb85..7ca8464f27 100644 --- a/test/e2e/util_gatewayapi_test.go +++ b/test/e2e/util_gatewayapi_test.go @@ -19,6 +19,7 @@ import ( operatorcontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller" operatorsv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -43,6 +44,10 @@ const ( openshiftIstiodDeploymentName = "istiod-openshift-gateway" // openshiftSMCPName holds the expected OSSM ServiceMeshControlPlane name openshiftSMCPName = "openshift-gateway" + // cvoNamespace is the namespace of cluster version operator (CVO). + cvoNamespace = "openshift-cluster-version" + // cvoDeploymentName is the name of cluster version operator's deployment. + cvoDeploymentName = "cluster-version-operator" ) // updateIngressOperatorRole updates the ingress-operator cluster role with cluster-admin privilege. @@ -149,6 +154,53 @@ func deleteExistingCRD(t *testing.T, crdName string) error { return nil } +// deleteExistingVAP deletes if the ValidatingAdmissionPolicy of the given name exists and returns an error if not. +func deleteExistingVAP(t *testing.T, vapName string) error { + t.Helper() + + vap := &admissionregistrationv1.ValidatingAdmissionPolicy{} + newVAP := &admissionregistrationv1.ValidatingAdmissionPolicy{} + name := types.NamespacedName{Name: vapName} + + // Retrieve the object to be deleted. + if err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 30*time.Second, false, func(context context.Context) (bool, error) { + if err := kclient.Get(context, name, vap); err != nil { + t.Logf("failed to get vap %q: %v, retrying ...", vapName, err) + return false, nil + } + return true, nil + }); err != nil { + return fmt.Errorf("failed to get vap %q: %w", vapName, err) + } + + if err := kclient.Delete(context.Background(), vap); err != nil { + return fmt.Errorf("failed to delete vap %q: %w", vapName, err) + } + + // Verify VAP was not recreated. + if err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 30*time.Second, false, func(ctx context.Context) (bool, error) { + if err := kclient.Get(ctx, name, newVAP); err != nil { + if kerrors.IsNotFound(err) { + // VAP does not exist as expected. + return true, nil + } + t.Logf("failed to get vap %q: %v, retrying ...", vapName, err) + return false, nil + } + // Check if new VAP got recreated. + if newVAP != nil && newVAP.UID != vap.UID { + return true, fmt.Errorf("vap %q got recreated", vapName) + } + t.Logf("vap %q still exists, retrying ...", vapName) + return false, nil + }); err != nil { + return fmt.Errorf("failed to verify deletion of vap %q: %v", vapName, err) + } + + t.Logf("deleted vap %q", vapName) + return nil +} + // createHttpRoute checks if the HTTPRoute can be created. // If it can't an error is returned. func createHttpRoute(namespace, routeName, parentNamespace, hostname, backendRefname string, gateway *gatewayapiv1.Gateway) (*gatewayapiv1.HTTPRoute, error) { @@ -272,6 +324,68 @@ func buildHTTPRoute(routeName, namespace, parentgateway, parentNamespace, hostna } } +// buildGWAPICRDFromName initializes the GatewayAPI CRD deducing most of its required fields from the given name. +func buildGWAPICRDFromName(name string) *apiextensionsv1.CustomResourceDefinition { + var ( + plural = strings.Split(name, ".")[0] + group, _ = strings.CutPrefix(name, plural+".") + scope = apiextensionsv1.NamespaceScoped + // removing trailing "s" + singular = plural[0 : len(plural)-1] + versions = []map[string]bool{{"v1": true /*storage version*/}, {"v1beta1": false}} + kind string + ) + + switch plural { + case "gatewayclasses": + singular = "gatewayclass" + kind = "GatewayClass" + scope = apiextensionsv1.ClusterScoped + case "gateways": + kind = "Gateway" + case "httproutes": + kind = "HTTPRoute" + case "referencegrants": + kind = "ReferenceGrant" + versions = []map[string]bool{{"v1beta1": true}} + } + + crd := &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: plural + "." + group, + Annotations: map[string]string{ + "api-approved.kubernetes.io": "https://github.com/kubernetes-sigs/gateway-api/pull/2466", + }, + }, + Spec: apiextensionsv1.CustomResourceDefinitionSpec{ + Group: group, + Names: apiextensionsv1.CustomResourceDefinitionNames{ + Singular: singular, + Plural: plural, + Kind: kind, + }, + Scope: scope, + }, + } + + for _, v := range versions { + for name, storage := range v { + crd.Spec.Versions = append(crd.Spec.Versions, apiextensionsv1.CustomResourceDefinitionVersion{ + Name: name, + Storage: storage, + Served: true, + Schema: &apiextensionsv1.CustomResourceValidation{ + OpenAPIV3Schema: &apiextensionsv1.JSONSchemaProps{ + Type: "object", + }, + }, + }) + } + } + + return crd +} + // assertSubscription checks if the Subscription of the given name exists and returns an error if not. func assertSubscription(t *testing.T, namespace, subName string) error { t.Helper() @@ -741,3 +855,83 @@ func assertDNSRecord(t *testing.T, recordName types.NamespacedName) error { }) return err } + +// assertVAP checks if the ValidatingAdmissionPolicy of the given name exists, and returns an error if not. +func assertVAP(t *testing.T, name string) error { + t.Helper() + vap := &admissionregistrationv1.ValidatingAdmissionPolicy{} + return wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 1*time.Minute, false, func(context context.Context) (bool, error) { + if err := kclient.Get(context, types.NamespacedName{Name: name}, vap); err != nil { + t.Logf("failed to get vap %q: %v, retrying...", name, err) + return false, nil + } + t.Logf("found vap %q", name) + return true, nil + }) +} + +// scaleDeployment scales the deployment with the given name to the specified number of replicas. +func scaleDeployment(t *testing.T, namespace, name string, replicas int32) error { + t.Helper() + + nsName := types.NamespacedName{Namespace: namespace, Name: name} + return wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 30*time.Second, false, func(context context.Context) (bool, error) { + depl := &appsv1.Deployment{} + if err := kclient.Get(context, nsName, depl); err != nil { + t.Logf("failed to get deployment %q: %v, retrying...", nsName, err) + return false, nil + } + if *depl.Spec.Replicas != replicas { + depl.Spec.Replicas = &replicas + if err := kclient.Update(context, depl); err != nil { + t.Logf("failed to update deployment %q: %v, retrying...", nsName, err) + return false, nil + } + t.Logf("scaled deployment %q to %d replica(s)", nsName, replicas) + } + if depl.Status.AvailableReplicas != replicas { + t.Logf("deployment %q expected to have %d available replica(s) but got %d, retrying...", nsName, replicas, depl.Status.AvailableReplicas) + return false, nil + } + t.Logf("deployment %q has %d available replica(s)", nsName, replicas) + return true, nil + }) +} + +// vapManager helps to disable the VAP resource which is managed by CVO. +type vapManager struct { + t *testing.T + name string +} + +// newVAPManager returns a new instance of VAPManager. +func newVAPManager(t *testing.T, vapName string) *vapManager { + return &vapManager{ + t: t, + name: vapName, + } +} + +// disable scales down CVO and removes the VAP resource. +func (m *vapManager) disable() (error, func()) { + if err := scaleDeployment(m.t, cvoNamespace, cvoDeploymentName, 0); err != nil { + return fmt.Errorf("failed to scale down cvo: %w", err), func() { /*scale down didn't work, nothing to do*/ } + } + if err := deleteExistingVAP(m.t, m.name); err != nil { + return fmt.Errorf("failed to delete vap %q: %w", m.name, err), func() { + if err := scaleDeployment(m.t, cvoNamespace, cvoDeploymentName, 1); err != nil { + m.t.Errorf("failed to scale up cvo: %v", err) + } + } + } + return nil, nil +} + +// Enable scales up CVO and waits until the VAP is recreated. +func (m *vapManager) enable() { + if err := scaleDeployment(m.t, cvoNamespace, cvoDeploymentName, 1); err != nil { + m.t.Errorf("failed to scale up cvo: %v", err) + } else if err := assertVAP(m.t, m.name); err != nil { + m.t.Errorf("failed to find vap %q: %v", m.name, err) + } +}