Skip to content

Commit 2ac094f

Browse files
rbaturovTal-or
authored andcommitted
e2e: add network policy tests
This test suite verifies that the correct NetworkPolicies are applied and enforced for the operator and its components. Specifically, it ensures: - All ingress and egress traffic is denied by default. - Egress traffic from the operator, RTE, and scheduler pods to the Kubernetes API server is allowed. - Ingress traffic to the metrics endpoints is allowed. - Ingress and egress traffic between other pods in the cluster is restricted. Due to the complexity of validating full inter-pod communication coverage, this suite includes basic but representative checks of the expected behavior. Signed-off-by: Ronny Baturov <[email protected]>
1 parent 1d92733 commit 2ac094f

File tree

2 files changed

+225
-0
lines changed

2 files changed

+225
-0
lines changed
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/*
2+
* Copyright 2025 Red Hat, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package tests
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"net"
23+
24+
corev1 "k8s.io/api/core/v1"
25+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26+
"k8s.io/apimachinery/pkg/labels"
27+
"k8s.io/client-go/kubernetes"
28+
"k8s.io/klog/v2"
29+
30+
"sigs.k8s.io/controller-runtime/pkg/client"
31+
32+
e2etestenv "github.com/k8stopologyawareschedwg/resource-topology-exporter/test/e2e/utils/testenv"
33+
34+
nropv1 "github.com/openshift-kni/numaresources-operator/api/v1"
35+
"github.com/openshift-kni/numaresources-operator/internal/remoteexec"
36+
e2eclient "github.com/openshift-kni/numaresources-operator/test/internal/clients"
37+
"github.com/openshift-kni/numaresources-operator/test/internal/deploy"
38+
"github.com/openshift-kni/numaresources-operator/test/internal/objects"
39+
40+
. "github.com/onsi/ginkgo/v2"
41+
. "github.com/onsi/gomega"
42+
)
43+
44+
// This test suite verifies that the correct network policies are applied and enforced.
45+
// Specifically, it checks the following:
46+
// - All ingress and egress traffic is denied by default for the NUMAResources operator.
47+
// - Egress traffic from the operator, RTE, and scheduler pods to the Kubernetes API server is allowed.
48+
// - Ingress and egress traffic to/from other pods in the cluster is restricted.
49+
// - Ingress traffic to the metrics endpoints is allowed.
50+
//
51+
// Full coverage for inter-pod communication is challenging, so we include basic tests to validate the expected behavior.
52+
53+
var _ = Describe("network policies are applied", Ordered, Label("feature:network_policies"), func() {
54+
var namespace string
55+
var ctx context.Context
56+
var nropObj *nropv1.NUMAResourcesOperator
57+
var nroSchedObj *nropv1.NUMAResourcesScheduler
58+
var operatorPod, schedulerPod, rteWorkerPod, prometheusPod *corev1.Pod
59+
60+
BeforeAll(func() {
61+
ctx := context.Background()
62+
nropObj = objects.TestNRO()
63+
Expect(e2eclient.Client.Get(ctx, client.ObjectKeyFromObject(nropObj), nropObj)).To(Succeed())
64+
65+
nroSchedObj = objects.TestNROScheduler()
66+
Expect(e2eclient.Client.Get(ctx, client.ObjectKeyFromObject(nroSchedObj), nroSchedObj)).To(Succeed())
67+
68+
Expect(nropObj.Status.NodeGroups).ToNot(BeEmpty())
69+
namespace = nropObj.Status.NodeGroups[0].DaemonSet.Namespace
70+
71+
var err error
72+
operatorPod, err = deploy.FindNUMAResourcesOperatorPod(ctx, e2eclient.Client, nropObj)
73+
Expect(err).ToNot(HaveOccurred())
74+
75+
schedulerPod, err = deploy.FindNUMAResourcesSchedulerPod(ctx, e2eclient.Client, nroSchedObj)
76+
Expect(err).ToNot(HaveOccurred())
77+
78+
pods, err := e2eclient.K8sClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
79+
LabelSelector: fmt.Sprintf("name=%s", e2etestenv.RTELabelName),
80+
})
81+
Expect(err).ToNot(HaveOccurred())
82+
Expect(pods.Items).ToNot(BeEmpty())
83+
rteWorkerPod = &pods.Items[0]
84+
85+
prometheusPods, err := e2eclient.K8sClient.CoreV1().Pods("openshift-monitoring").List(ctx, metav1.ListOptions{
86+
LabelSelector: labels.SelectorFromSet(map[string]string{
87+
"app.kubernetes.io/name": "prometheus",
88+
}).String(),
89+
})
90+
Expect(err).ToNot(HaveOccurred())
91+
Expect(prometheusPods.Items).ToNot(BeEmpty())
92+
prometheusPod = &prometheusPods.Items[0]
93+
})
94+
95+
type trafficCase struct {
96+
FromPod func() *corev1.Pod
97+
ToHost func() string
98+
ToPort string
99+
ShouldAllow bool
100+
Description string
101+
}
102+
103+
DescribeTable("traffic behavior",
104+
func(tc trafficCase) {
105+
Expect(tc.FromPod).ToNot(BeNil(), "source pod should not be nil")
106+
klog.InfoS("Running traffic test", "description", tc.Description)
107+
reachable := trafficTest(e2eclient.K8sClient, ctx, tc.FromPod(), tc.ToHost(), tc.ToPort)
108+
klog.InfoS("Traffic test result", "reachable", reachable)
109+
Expect(reachable).To(Equal(tc.ShouldAllow), tc.Description)
110+
},
111+
// Testing operator and operands egress traffic to API server
112+
Entry("operator -> API server", trafficCase{
113+
FromPod: func() *corev1.Pod { return operatorPod },
114+
ToHost: func() string { return "$KUBERNETES_SERVICE_HOST" },
115+
ToPort: "$KUBERNETES_SERVICE_PORT",
116+
ShouldAllow: true,
117+
Description: "operator should access API server",
118+
}),
119+
Entry("scheduler -> API server", trafficCase{
120+
FromPod: func() *corev1.Pod { return schedulerPod },
121+
ToHost: func() string { return "$KUBERNETES_SERVICE_HOST" },
122+
ToPort: "$KUBERNETES_SERVICE_PORT",
123+
ShouldAllow: true,
124+
Description: "scheduler should access API server",
125+
}),
126+
Entry("rte worker -> API server", trafficCase{
127+
FromPod: func() *corev1.Pod { return rteWorkerPod },
128+
ToHost: func() string { return "$KUBERNETES_SERVICE_HOST" },
129+
ToPort: "$KUBERNETES_SERVICE_PORT",
130+
ShouldAllow: true,
131+
Description: "rte worker should access API server",
132+
}),
133+
134+
// Testing operator and RTE metrics endpoints
135+
Entry("prometheus operator -> numaresouces operator metrics endpoint", trafficCase{
136+
FromPod: func() *corev1.Pod { return prometheusPod },
137+
ToHost: func() string { return operatorPod.Status.PodIP },
138+
ToPort: "8080",
139+
ShouldAllow: true,
140+
Description: "prometheus operator pod should access numaresources operator metrics endpoint",
141+
}),
142+
143+
Entry("prometheus operator -> numaresouces rte worker endpoint", trafficCase{
144+
FromPod: func() *corev1.Pod { return prometheusPod },
145+
ToHost: func() string { return rteWorkerPod.Status.PodIP },
146+
ToPort: "2112",
147+
ShouldAllow: true,
148+
Description: "prometheus operator pod should access rte worker metrics endpoint",
149+
}),
150+
151+
// Testing traffic restrictions between pods in the numaresources namespace
152+
Entry("scheduler -> operator", trafficCase{
153+
FromPod: func() *corev1.Pod { return schedulerPod },
154+
ToHost: func() string { return operatorPod.Status.PodIP },
155+
ToPort: "8081",
156+
ShouldAllow: false,
157+
Description: "scheduler should NOT access operator",
158+
}),
159+
160+
// Testing network traffic restrictions between pods cross namespaces (numaresouces and openshift-monitoring)
161+
Entry("numaresouces operator -> prometheus operator", trafficCase{
162+
FromPod: func() *corev1.Pod { return operatorPod },
163+
ToHost: func() string { return prometheusPod.Status.PodIP },
164+
ToPort: "8081",
165+
ShouldAllow: false,
166+
Description: "numaresources operator should NOT access prometheus operator pod",
167+
}),
168+
169+
Entry("prometheus operator -> numaresouces operator", trafficCase{
170+
FromPod: func() *corev1.Pod { return prometheusPod },
171+
ToHost: func() string { return operatorPod.Status.PodIP },
172+
ToPort: "8081", // readinessProbe!
173+
ShouldAllow: false,
174+
Description: "prometheus operator pod should NOT access numaresources operator pod's readiness probe endpoint)",
175+
}),
176+
)
177+
})
178+
179+
// trafficTest returns true if the sourcePod can connect to the given destination IP and port over HTTP.
180+
// It is used to validate network connectivity (e.g., for testing networkPolicy behavior).
181+
func trafficTest(cli *kubernetes.Clientset, ctx context.Context, sourcePod *corev1.Pod, destinationIP, destinationPort string) bool {
182+
GinkgoHelper()
183+
endpoint := net.JoinHostPort(destinationIP, destinationPort)
184+
185+
key := client.ObjectKeyFromObject(sourcePod)
186+
By(fmt.Sprintf("verifying HTTP egress connectivity from pod %q to endpoint %s", key.String(), endpoint))
187+
188+
cmd := []string{"sh", "-c", fmt.Sprintf("curl --connect-timeout 5 http://%s", endpoint)}
189+
190+
stdout, stderr, err := remoteexec.CommandOnPod(ctx, cli, sourcePod, cmd...)
191+
192+
if err != nil {
193+
GinkgoWriter.Printf("curl failed: stdout=%q, stderr=%q, err=%v\n", stdout, stderr, err)
194+
}
195+
return err == nil
196+
}

test/internal/deploy/find.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,32 @@ func FindNUMAResourcesOperatorPod(ctx context.Context, cli client.Client, nrop *
6060

6161
return &podList.Items[0], nil
6262
}
63+
64+
func FindNUMAResourcesSchedulerPod(ctx context.Context, cli client.Client, nrScheduler *nropv1.NUMAResourcesScheduler) (*corev1.Pod, error) {
65+
if nrScheduler.Status.Deployment.Name == "" || nrScheduler.Status.Deployment.Namespace == "" {
66+
return nil, errors.New("scheduler deployment not reported in status")
67+
}
68+
klog.InfoS("NRO scheduler pod", "namespace", nrScheduler.Status.Deployment.Namespace)
69+
70+
sel, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
71+
MatchLabels: map[string]string{
72+
"app": "secondary-scheduler",
73+
},
74+
})
75+
if err != nil {
76+
return nil, err
77+
}
78+
klog.InfoS("NRO scheduler pod", "selector", sel.String())
79+
80+
podList := corev1.PodList{}
81+
err = cli.List(ctx, &podList, &client.ListOptions{Namespace: nrScheduler.Status.Deployment.Namespace, LabelSelector: sel})
82+
if err != nil {
83+
return nil, err
84+
}
85+
86+
if len(podList.Items) < 1 {
87+
return nil, fmt.Errorf("unexpected number of pods found: %d", len(podList.Items))
88+
}
89+
90+
return &podList.Items[0], nil
91+
}

0 commit comments

Comments
 (0)