Skip to content

Commit 7940381

Browse files
committed
Add OTE helper functions in test/library/ote
Add two new OTE helper functions for common test scenarios: 1. WaitForAPIServerRollout: Waits for all API server pods to be recreated after a configuration change. Unlike WaitForAPIServerToStabilizeOnTheSameRevision, this specifically waits for NEW pods to replace old ones. 2. WaitForFeatureGateEnabled: Waits for a specific feature gate to be enabled in the cluster by polling the FeatureGate resource. These functions are needed for testing configuration changes and feature gate enablement in operator e2e tests, particularly for EventTTL configuration tests in cluster-kube-apiserver-operator.
1 parent ab97ebb commit 7940381

File tree

1 file changed

+169
-0
lines changed

1 file changed

+169
-0
lines changed

test/ote/util.go

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
package ote
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"time"
7+
8+
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
9+
"github.com/openshift/library-go/test/library"
10+
corev1 "k8s.io/api/core/v1"
11+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
12+
"k8s.io/apimachinery/pkg/util/wait"
13+
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
14+
)
15+
16+
// WaitForAPIServerRollout waits for all API server pods to be recreated and running
17+
// after a configuration change. Unlike WaitForAPIServerToStabilizeOnTheSameRevision which
18+
// waits for pods to converge on the same revision, this function specifically waits for
19+
// NEW pods (created after the function is called) to replace the old ones.
20+
//
21+
// This is useful when you make a configuration change and need to ensure all pods have
22+
// been recreated with the new configuration, not just that they're on the same revision.
23+
//
24+
// Parameters:
25+
// - t: Logger interface for test output
26+
// - podClient: Pod client interface for the target namespace
27+
// - labelSelector: Label selector to identify API server pods (e.g., "apiserver=true")
28+
// - timeout: Maximum time to wait for rollout to complete
29+
//
30+
// Returns:
31+
// - error if timeout is reached or an error occurs during polling
32+
//
33+
// Note:
34+
// - All existing pods must be replaced by new pods created after this function is called
35+
// - Supports both single-node and multi-node deployments
36+
func WaitForAPIServerRollout(t library.LoggingT, podClient corev1client.PodInterface, labelSelector string, timeout time.Duration) error {
37+
rolloutStartTime := time.Now()
38+
39+
// Get current pods before we start waiting
40+
initialPods, err := podClient.List(context.Background(), metav1.ListOptions{
41+
LabelSelector: labelSelector,
42+
})
43+
if err != nil {
44+
t.Logf("Warning: Could not get initial pods: %v", err)
45+
}
46+
47+
var oldestPodTime time.Time
48+
initialRevision := ""
49+
if initialPods != nil && len(initialPods.Items) > 0 {
50+
oldestPodTime = initialPods.Items[0].CreationTimestamp.Time
51+
for _, pod := range initialPods.Items {
52+
if pod.CreationTimestamp.Time.Before(oldestPodTime) {
53+
oldestPodTime = pod.CreationTimestamp.Time
54+
}
55+
if rev, ok := pod.Labels["revision"]; ok && initialRevision == "" {
56+
initialRevision = rev
57+
}
58+
}
59+
t.Logf("Initial state: %d pods, oldest created at %s, initial revision: %s",
60+
len(initialPods.Items), oldestPodTime.Format(time.RFC3339), initialRevision)
61+
}
62+
63+
attempt := 0
64+
lastPodCount := 0
65+
lastNotRunningCount := 0
66+
67+
return wait.PollUntilContextTimeout(context.Background(), 15*time.Second, timeout, false, func(ctx context.Context) (bool, error) {
68+
attempt++
69+
pods, err := podClient.List(ctx, metav1.ListOptions{
70+
LabelSelector: labelSelector,
71+
})
72+
if err != nil {
73+
t.Logf("[Attempt %d] Error listing pods: %v", attempt, err)
74+
return false, nil
75+
}
76+
77+
if len(pods.Items) == 0 {
78+
t.Logf("[Attempt %d] No pods found yet", attempt)
79+
return false, nil
80+
}
81+
82+
// Count pods and check if we have new pods (created after rollout started)
83+
notRunningCount := 0
84+
newPodsCount := 0
85+
runningNewPodsCount := 0
86+
var notRunningPods []string
87+
var currentRevision string
88+
89+
for _, pod := range pods.Items {
90+
isNewPod := pod.CreationTimestamp.Time.After(rolloutStartTime)
91+
92+
if pod.Status.Phase != corev1.PodRunning {
93+
notRunningCount++
94+
notRunningPods = append(notRunningPods, fmt.Sprintf("%s (%s)", pod.Name, pod.Status.Phase))
95+
}
96+
97+
if isNewPod {
98+
newPodsCount++
99+
if pod.Status.Phase == corev1.PodRunning {
100+
runningNewPodsCount++
101+
}
102+
}
103+
104+
if rev, ok := pod.Labels["revision"]; ok && currentRevision == "" {
105+
currentRevision = rev
106+
}
107+
}
108+
109+
// Success condition: ALL pods must be new (created after rolloutStartTime) and running
110+
expectedPodCount := len(pods.Items)
111+
allPodsNewAndRunning := newPodsCount == expectedPodCount && runningNewPodsCount == expectedPodCount
112+
113+
// Log only when state changes or every 4th attempt (1 minute)
114+
if notRunningCount != lastNotRunningCount || len(pods.Items) != lastPodCount || attempt%4 == 0 {
115+
if notRunningCount > 0 {
116+
t.Logf("[Attempt %d] %d/%d pods running. Not running: %v. New pods: %d/%d running",
117+
attempt, len(pods.Items)-notRunningCount, len(pods.Items), notRunningPods, runningNewPodsCount, newPodsCount)
118+
} else {
119+
t.Logf("[Attempt %d] All %d pods are running. New pods: %d/%d. Revision: %s",
120+
attempt, len(pods.Items), runningNewPodsCount, newPodsCount, currentRevision)
121+
}
122+
lastPodCount = len(pods.Items)
123+
lastNotRunningCount = notRunningCount
124+
}
125+
126+
return allPodsNewAndRunning, nil
127+
})
128+
}
129+
130+
// WaitForFeatureGateEnabled waits for a specific feature gate to be enabled in the cluster.
131+
//
132+
// This function polls the FeatureGate resource until the specified feature is found in the
133+
// enabled list or the timeout is reached.
134+
//
135+
// Parameters:
136+
// - t: Logger interface for test output
137+
// - featureGateClient: FeatureGate client interface
138+
// - featureName: Name of the feature gate to wait for (e.g., "EventTTL")
139+
// - timeout: Maximum time to wait for the feature gate to be enabled
140+
//
141+
// Returns:
142+
// - error if timeout is reached or an error occurs during polling
143+
func WaitForFeatureGateEnabled(t library.LoggingT, featureGateClient configv1client.FeatureGateInterface, featureName string, timeout time.Duration) error {
144+
t.Logf("Waiting for feature gate %s to be enabled (timeout: %v)", featureName, timeout)
145+
attempt := 0
146+
147+
return wait.PollUntilContextTimeout(context.Background(), 10*time.Second, timeout, false, func(ctx context.Context) (bool, error) {
148+
attempt++
149+
fg, err := featureGateClient.Get(ctx, "cluster", metav1.GetOptions{})
150+
if err != nil {
151+
t.Logf("[Attempt %d] Error getting feature gate: %v", attempt, err)
152+
return false, nil
153+
}
154+
155+
for _, fgDetails := range fg.Status.FeatureGates {
156+
for _, enabled := range fgDetails.Enabled {
157+
if string(enabled.Name) == featureName {
158+
t.Logf("[Attempt %d] Feature gate %s is enabled", attempt, featureName)
159+
return true, nil
160+
}
161+
}
162+
}
163+
164+
if attempt%6 == 0 { // Log every minute
165+
t.Logf("[Attempt %d] Feature gate %s not yet enabled, waiting...", attempt, featureName)
166+
}
167+
return false, nil
168+
})
169+
}

0 commit comments

Comments
 (0)