Skip to content

Commit e798833

Browse files
rbaturovTal-or
authored andcommitted
e2e: add network policy tests
This test suite verifies that the correct NetworkPolicies are applied and enforced for the operator and its components. Specifically, it ensures: - All ingress and egress traffic is denied by default. - Egress traffic from the operator, RTE, and scheduler pods to the Kubernetes API server is allowed. - Ingress traffic to the metrics endpoints is allowed. - Ingress and egress traffic between other pods in the cluster is restricted. Due to the complexity of validating full inter-pod communication coverage, this suite includes basic but representative checks of the expected behavior. Signed-off-by: Ronny Baturov <[email protected]>
1 parent 2039347 commit e798833

File tree

2 files changed

+227
-0
lines changed

2 files changed

+227
-0
lines changed
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
/*
2+
* Copyright 2025 Red Hat, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package tests
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"net"
23+
24+
corev1 "k8s.io/api/core/v1"
25+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26+
"k8s.io/apimachinery/pkg/labels"
27+
"k8s.io/client-go/kubernetes"
28+
"k8s.io/klog/v2"
29+
"sigs.k8s.io/controller-runtime/pkg/client"
30+
31+
. "github.com/onsi/ginkgo/v2"
32+
. "github.com/onsi/gomega"
33+
34+
e2etestenv "github.com/k8stopologyawareschedwg/resource-topology-exporter/test/e2e/utils/testenv"
35+
nropv1 "github.com/openshift-kni/numaresources-operator/api/v1"
36+
"github.com/openshift-kni/numaresources-operator/internal/remoteexec"
37+
e2eclient "github.com/openshift-kni/numaresources-operator/test/internal/clients"
38+
"github.com/openshift-kni/numaresources-operator/test/internal/deploy"
39+
"github.com/openshift-kni/numaresources-operator/test/internal/objects"
40+
)
41+
42+
// This test suite verifies that the correct network policies are applied and enforced.
43+
// Specifically, it checks the following:
44+
// - All ingress and egress traffic is denied by default for the NUMAResources operator.
45+
// - Egress traffic from the operator, RTE, and scheduler pods to the Kubernetes API server is allowed.
46+
// - Ingress and egress traffic to/from other pods in the cluster is restricted.
47+
// - Ingress traffic to the metrics endpoints is allowed.
48+
//
49+
// Full coverage for inter-pod communication is challenging, so we include basic tests to validate the expected behavior.
50+
51+
var _ = Describe("network policies are applied", Ordered, Label("feature:network_policies"), func() {
52+
ctx := context.Background()
53+
var namespace string
54+
var nropObj *nropv1.NUMAResourcesOperator
55+
var nroSchedObj *nropv1.NUMAResourcesScheduler
56+
var operatorPod, schedulerPod, rteWorkerPod, prometheusPod *corev1.Pod
57+
58+
BeforeAll(func() {
59+
nropObj = objects.TestNRO()
60+
Expect(e2eclient.Client.Get(ctx, client.ObjectKeyFromObject(nropObj), nropObj)).To(Succeed())
61+
62+
nroSchedObj = objects.TestNROScheduler()
63+
Expect(e2eclient.Client.Get(ctx, client.ObjectKeyFromObject(nroSchedObj), nroSchedObj)).To(Succeed())
64+
65+
Expect(nropObj.Status.NodeGroups).ToNot(BeEmpty())
66+
namespace = nropObj.Status.NodeGroups[0].DaemonSet.Namespace
67+
68+
var err error
69+
operatorPod, err = deploy.FindNUMAResourcesOperatorPod(ctx, e2eclient.Client, nropObj)
70+
Expect(err).ToNot(HaveOccurred())
71+
72+
schedulerPod, err = deploy.FindNUMAResourcesSchedulerPod(ctx, e2eclient.Client, nroSchedObj)
73+
Expect(err).ToNot(HaveOccurred())
74+
75+
pods, err := e2eclient.K8sClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
76+
LabelSelector: fmt.Sprintf("name=%s", e2etestenv.RTELabelName),
77+
})
78+
Expect(err).ToNot(HaveOccurred())
79+
Expect(pods.Items).ToNot(BeEmpty())
80+
rteWorkerPod = &pods.Items[0]
81+
82+
prometheusPods, err := e2eclient.K8sClient.CoreV1().Pods("openshift-monitoring").List(ctx, metav1.ListOptions{
83+
LabelSelector: labels.SelectorFromSet(map[string]string{
84+
"app.kubernetes.io/name": "prometheus",
85+
}).String(),
86+
})
87+
Expect(err).ToNot(HaveOccurred())
88+
Expect(prometheusPods.Items).ToNot(BeEmpty())
89+
prometheusPod = &prometheusPods.Items[0]
90+
})
91+
92+
type trafficCase struct {
93+
FromPod func() *corev1.Pod
94+
ToHost func() string
95+
ToPort string
96+
ShouldAllow bool
97+
Description string
98+
}
99+
100+
DescribeTable("traffic behavior",
101+
func(tc trafficCase) {
102+
Expect(tc.FromPod).ToNot(BeNil(), "source pod should not be nil")
103+
klog.InfoS("Running traffic test", "description", tc.Description)
104+
reachable := trafficTest(e2eclient.K8sClient, ctx, tc.FromPod(), tc.ToHost(), tc.ToPort)
105+
klog.InfoS("reachable", "reachable", reachable)
106+
107+
if tc.ShouldAllow {
108+
Expect(reachable).To(BeTrue(), tc.Description)
109+
} else {
110+
Expect(reachable).To(BeFalse(), tc.Description)
111+
}
112+
},
113+
// Testing operator and operands egress traffic to API server
114+
Entry("operator -> API server", trafficCase{
115+
FromPod: func() *corev1.Pod { return operatorPod },
116+
ToHost: func() string { return "$KUBERNETES_SERVICE_HOST" },
117+
ToPort: "$KUBERNETES_SERVICE_PORT",
118+
ShouldAllow: true,
119+
Description: "operator should access API server",
120+
}),
121+
Entry("scheduler -> API server", trafficCase{
122+
FromPod: func() *corev1.Pod { return schedulerPod },
123+
ToHost: func() string { return "$KUBERNETES_SERVICE_HOST" },
124+
ToPort: "$KUBERNETES_SERVICE_PORT",
125+
ShouldAllow: true,
126+
Description: "scheduler should access API server",
127+
}),
128+
Entry("rte worker -> API server", trafficCase{
129+
FromPod: func() *corev1.Pod { return rteWorkerPod },
130+
ToHost: func() string { return "$KUBERNETES_SERVICE_HOST" },
131+
ToPort: "$KUBERNETES_SERVICE_PORT",
132+
ShouldAllow: true,
133+
Description: "rte worker should access API server",
134+
}),
135+
136+
// Testing operator and RTE metrics endpoints
137+
Entry("prometheus operator -> numaresouces operator metrics endpoint", trafficCase{
138+
FromPod: func() *corev1.Pod { return prometheusPod },
139+
ToHost: func() string { return operatorPod.Status.PodIP },
140+
ToPort: "8080",
141+
ShouldAllow: true,
142+
Description: "prometheus operator pod should access numaresources operator metrics endpoint",
143+
}),
144+
145+
Entry("prometheus operator -> numaresouces rte worker endpoint", trafficCase{
146+
FromPod: func() *corev1.Pod { return prometheusPod },
147+
ToHost: func() string { return rteWorkerPod.Status.PodIP },
148+
ToPort: "2112",
149+
ShouldAllow: true,
150+
Description: "prometheus operator pod should access rte worker metrics endpoint",
151+
}),
152+
153+
// Testing traffic restrictions between pods in the numaresources namespace
154+
Entry("scheduler -> operator", trafficCase{
155+
FromPod: func() *corev1.Pod { return schedulerPod },
156+
ToHost: func() string { return operatorPod.Status.PodIP },
157+
ToPort: "8081",
158+
ShouldAllow: false,
159+
Description: "scheduler should NOT access operator",
160+
}),
161+
162+
// Testing network traffic restrictions between pods cross namespaces (numaresouces and openshift-monitoring)
163+
Entry("numaresouces operator -> prometheus operator", trafficCase{
164+
FromPod: func() *corev1.Pod { return operatorPod },
165+
ToHost: func() string { return prometheusPod.Status.PodIP },
166+
ToPort: "8081",
167+
ShouldAllow: false,
168+
Description: "numaresources operator should NOT access prometheus operator pod",
169+
}),
170+
171+
Entry("prometheus operator -> numaresouces operator", trafficCase{
172+
FromPod: func() *corev1.Pod { return prometheusPod },
173+
ToHost: func() string { return operatorPod.Status.PodIP },
174+
ToPort: "8081", // readinessProbe!
175+
ShouldAllow: false,
176+
Description: "prometheus operator pod should NOT access numaresources operator pod's readiness probe endpoint)",
177+
}),
178+
)
179+
})
180+
181+
// trafficTest returns true if the sourcePod can connect to the given destination IP and port over HTTP.
182+
// It is used to validate network connectivity (e.g., for testing networkPolicy behavior).
183+
func trafficTest(cli *kubernetes.Clientset, ctx context.Context, sourcePod *corev1.Pod, destinationIP, destinationPort string) bool {
184+
GinkgoHelper()
185+
endpoint := net.JoinHostPort(destinationIP, destinationPort)
186+
187+
key := client.ObjectKeyFromObject(sourcePod)
188+
By(fmt.Sprintf("verifying HTTP egress connectivity from pod %q to endpoint %s", key.String(), endpoint))
189+
190+
cmd := []string{"sh", "-c", fmt.Sprintf("curl --connect-timeout 5 http://%s", endpoint)}
191+
192+
stdout, stderr, err := remoteexec.CommandOnPod(ctx, cli, sourcePod, cmd...)
193+
194+
if err != nil {
195+
GinkgoWriter.Printf("curl failed: stdout=%q, stderr=%q, err=%v\n", stdout, stderr, err)
196+
}
197+
return err == nil
198+
}

test/internal/deploy/find.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,32 @@ func FindNUMAResourcesOperatorPod(ctx context.Context, cli client.Client, nrop *
6060

6161
return &podList.Items[0], nil
6262
}
63+
64+
func FindNUMAResourcesSchedulerPod(ctx context.Context, cli client.Client, nrScheduler *nropv1.NUMAResourcesScheduler) (*corev1.Pod, error) {
65+
if nrScheduler.Status.Deployment.Name == "" || nrScheduler.Status.Deployment.Namespace == "" {
66+
return nil, errors.New("scheduler deployment not reported in status")
67+
}
68+
klog.InfoS("NRO scheduler pod", "namespace", nrScheduler.Status.Deployment.Namespace)
69+
70+
sel, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
71+
MatchLabels: map[string]string{
72+
"app": "secondary-scheduler",
73+
},
74+
})
75+
if err != nil {
76+
return nil, err
77+
}
78+
klog.InfoS("NRO scheduler pod", "selector", sel.String())
79+
80+
podList := corev1.PodList{}
81+
err = cli.List(ctx, &podList, &client.ListOptions{Namespace: nrScheduler.Status.Deployment.Namespace, LabelSelector: sel})
82+
if err != nil {
83+
return nil, err
84+
}
85+
86+
if len(podList.Items) < 1 {
87+
return nil, fmt.Errorf("unexpected number of pods found: %d", len(podList.Items))
88+
}
89+
90+
return &podList.Items[0], nil
91+
}

0 commit comments

Comments
 (0)