Skip to content

Commit 2d391bf

Browse files
Merge pull request #29428 from tssurya/fix-eventually-timeout-seconds-for-udn-tests
OCPBUGS-48238,OCPBUGS-48449,OCPBUGS-48450: Set the poll timeout and poll interval in eventually directly for the tests
2 parents e356b61 + d7ff8ae commit 2d391bf

File tree

4 files changed

+50
-35
lines changed

4 files changed

+50
-35
lines changed

test/extended/networking/network_segmentation.go

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,25 @@ import (
3838

3939
const openDefaultPortsAnnotation = "k8s.ovn.org/open-default-ports"
4040

41+
// NOTE: We are observing pod creation requests taking more than two minutes t
42+
// reach the CNI for the CNI to do the necessary plumbing. This is causing tests
43+
// to timeout since pod doesn't go into ready state.
44+
// See https://issues.redhat.com/browse/OCPBUGS-48362 for details. We can revisit
45+
// these values when that bug is fixed but given the Kubernetes test default for a
46+
// pod to startup is 5mins: https://github.com/kubernetes/kubernetes/blob/60c4c2b2521fb454ce69dee737e3eb91a25e0535/test/e2e/framework/timeouts.go#L22-L23
47+
// we are not too far from the mark or against test policy
48+
const podReadyPollTimeout = 4 * time.Minute
49+
const podReadyPollInterval = 6 * time.Second
50+
51+
// NOTE: Upstream, we use either the default of gomega which is 1sec polltimeout with 10ms pollinterval OR
52+
// the tests have hardcoded values with 5sec being common for polltimeout and 10ms for pollinterval
53+
// This is being changed to be 10seconds poll timeout to account for infrastructure complexity between
54+
// OpenShift and KIND clusters. Also changing the polling interval to be 1 second so that in both
55+
// Eventually and Consistently blocks we get at least 10 retries (10/1) in good conditions and 5 retries (10/2) in
56+
// bad conditions since connectToServer util has a 2 second timeout.
57+
const serverConnectPollTimeout = 10 * time.Second
58+
const serverConnectPollInterval = 1 * time.Second
59+
4160
var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:UserDefinedPrimaryNetworks]", func() {
4261
// TODO: so far, only the isolation tests actually require this PSA ... Feels wrong to run everything priviliged.
4362
// I've tried to have multiple kubeframeworks (from multiple OCs) running (with different project names) but
@@ -263,7 +282,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
263282
By("checking the default network pod can't reach UDN pod on IP " + destIP)
264283
Consistently(func() bool {
265284
return connectToServer(podConfiguration{namespace: defaultPod.Namespace, name: defaultPod.Name}, destIP, port) != nil
266-
}, 5*time.Second).Should(BeTrue())
285+
}, serverConnectPollTimeout, serverConnectPollInterval).Should(BeTrue())
267286
}
268287

269288
defaultIPv4, defaultIPv6, err := podIPsForDefaultNetwork(
@@ -280,11 +299,11 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
280299
By("checking the default network client pod can reach default pod on IP " + destIP)
281300
Eventually(func() bool {
282301
return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, defaultPort) == nil
283-
}).Should(BeTrue())
302+
}, serverConnectPollTimeout, serverConnectPollInterval).Should(BeTrue())
284303
By("checking the UDN pod can't reach the default network pod on IP " + destIP)
285304
Consistently(func() bool {
286305
return connectToServer(udnPodConfig, destIP, defaultPort) != nil
287-
}, 5*time.Second).Should(BeTrue())
306+
}, serverConnectPollTimeout, serverConnectPollInterval).Should(BeTrue())
288307
}
289308

290309
// connectivity check is run every second + 1sec initialDelay
@@ -314,7 +333,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
314333
ping, "-I", "eth0", "-c", "1", "-W", "1", hostIP.IP,
315334
)
316335
return err == nil
317-
}, 4*time.Second).Should(BeFalse())
336+
}, 4*time.Second, 1*time.Second).Should(BeFalse())
318337
}
319338

320339
By("asserting UDN pod can reach the kapi service in the default network")
@@ -331,7 +350,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
331350
"--insecure",
332351
"https://kubernetes.default/healthz")
333352
return err == nil
334-
}, 5*time.Second).Should(BeTrue())
353+
}, 5*time.Second, 1*time.Second).Should(BeTrue())
335354

336355
By("asserting UDN pod can't reach default services via default network interface")
337356
// route setup is already done, get kapi IPs
@@ -353,7 +372,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
353372
"--insecure",
354373
fmt.Sprintf("https://%s/healthz", kapiIP))
355374
return err != nil
356-
}, 5*time.Second).Should(BeTrue())
375+
}, 5*time.Second, 1*time.Second).Should(BeTrue())
357376
}
358377
},
359378
Entry(
@@ -662,7 +681,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
662681
_ = nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Delete(ctx, testUdnName, metav1.DeleteOptions{})
663682
_, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Get(ctx, testUdnName, metav1.GetOptions{})
664683
return err
665-
}).ShouldNot(HaveOccurred(),
684+
}, udnInUseDeleteTimeout, deleteNetworkInterval).ShouldNot(HaveOccurred(),
666685
"should fail to delete UserDefinedNetwork associated NetworkAttachmentDefinition when used")
667686

668687
By("verify UserDefinedNetwork status reports consuming pod")
@@ -907,7 +926,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
907926
_ = nadClient.NetworkAttachmentDefinitions(inUseNetTestTenantNamespace).Delete(ctx, testClusterUdnName, metav1.DeleteOptions{})
908927
_, err := nadClient.NetworkAttachmentDefinitions(inUseNetTestTenantNamespace).Get(ctx, testClusterUdnName, metav1.GetOptions{})
909928
return err
910-
}).ShouldNot(HaveOccurred(),
929+
}, udnInUseDeleteTimeout, deleteNetworkInterval).ShouldNot(HaveOccurred(),
911930
"should fail to delete UserDefinedNetwork associated NetworkAttachmentDefinition when used")
912931

913932
By("verify CR status reports consuming pod")
@@ -969,6 +988,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
969988
By("create primary Cluster UDN CR")
970989
cudnName := randomNetworkMetaName()
971990
cleanup, err := createManifest(f.Namespace.Name, newPrimaryClusterUDNManifest(cudnName, testTenantNamespaces...))
991+
Expect(err).NotTo(HaveOccurred())
972992
DeferCleanup(func() {
973993
cleanup()
974994
_, err := e2ekubectl.RunKubectl("", "delete", "clusteruserdefinednetwork", cudnName, "--wait", fmt.Sprintf("--timeout=%ds", 60))
@@ -1060,7 +1080,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
10601080
By("checking the default network pod can't reach UDN pod on IP " + destIP)
10611081
Consistently(func() bool {
10621082
return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, port) != nil
1063-
}, 5*time.Second).Should(BeTrue())
1083+
}, serverConnectPollTimeout, serverConnectPollInterval).Should(BeTrue())
10641084
}
10651085

10661086
By("Open UDN pod port")
@@ -1078,7 +1098,7 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
10781098
By("checking the default network pod can reach UDN pod on IP " + destIP)
10791099
Eventually(func() bool {
10801100
return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, port) == nil
1081-
}, 5*time.Second).Should(BeTrue())
1101+
}, serverConnectPollTimeout, serverConnectPollInterval).Should(BeTrue())
10821102
}
10831103

10841104
By("Update UDN pod port with the wrong syntax")
@@ -1097,10 +1117,11 @@ var _ = Describe("[sig-network][OCPFeatureGate:NetworkSegmentation][Feature:User
10971117
By("checking the default network pod can't reach UDN pod on IP " + destIP)
10981118
Eventually(func() bool {
10991119
return connectToServer(podConfiguration{namespace: defaultClientPod.Namespace, name: defaultClientPod.Name}, destIP, port) != nil
1100-
}, 5*time.Second).Should(BeTrue())
1120+
}, serverConnectPollTimeout, serverConnectPollInterval).Should(BeTrue())
11011121
}
11021122
By("Verify syntax error is reported via event")
11031123
events, err := cs.CoreV1().Events(udnPod.Namespace).List(context.Background(), metav1.ListOptions{})
1124+
Expect(err).NotTo(HaveOccurred())
11041125
found := false
11051126
for _, event := range events.Items {
11061127
if event.Reason == "ErrorUpdatingResource" && strings.Contains(event.Message, "invalid protocol ppp") {
@@ -1586,7 +1607,7 @@ func runUDNPod(cs clientset.Interface, namespace string, podConfig podConfigurat
15861607
return v1.PodFailed
15871608
}
15881609
return updatedPod.Status.Phase
1589-
}, 2*time.Minute, 6*time.Second).Should(Equal(v1.PodRunning))
1610+
}, podReadyPollTimeout, podReadyPollInterval).Should(Equal(v1.PodRunning))
15901611
return updatedPod
15911612
}
15921613

test/extended/prometheus/collection_profiles.go

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ const (
3333
operatorName = "cluster-monitoring-operator"
3434
operatorNamespaceName = "openshift-monitoring"
3535
operatorConfigurationName = "cluster-monitoring-config"
36+
pollTimeout = 15 * time.Minute
37+
pollInterval = 5 * time.Second
3638
)
3739

3840
var (
@@ -58,9 +60,6 @@ type runner struct {
5860
var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfiles] The collection profiles feature-set", g.Ordered, func() {
5961
defer g.GinkgoRecover()
6062

61-
o.SetDefaultEventuallyTimeout(15 * time.Minute)
62-
o.SetDefaultEventuallyPollingInterval(5 * time.Second)
63-
6463
r := &runner{}
6564

6665
g.BeforeAll(func() {
@@ -92,7 +91,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
9291
}
9392

9493
return nil
95-
}).Should(o.BeNil())
94+
}, pollTimeout, pollInterval).Should(o.BeNil())
9695
r.originalOperatorConfiguration = operatorConfiguration
9796
})
9897

@@ -126,7 +125,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
126125
}
127126

128127
return nil
129-
}).Should(o.BeNil())
128+
}, pollTimeout, pollInterval).Should(o.BeNil())
130129
})
131130

132131
g.It("should expose default metrics", func() {
@@ -142,7 +141,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
142141
}
143142

144143
return nil
145-
}).Should(o.BeNil())
144+
}, pollTimeout, pollInterval).Should(o.BeNil())
146145
})
147146
})
148147

@@ -163,7 +162,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
163162
}
164163

165164
return nil
166-
}).Should(o.BeNil())
165+
}, pollTimeout, pollInterval).Should(o.BeNil())
167166
}
168167
})
169168
g.It("should have at least one implementation for each collection profile", func() {
@@ -181,7 +180,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
181180
}
182181

183182
return nil
184-
}).Should(o.BeNil())
183+
}, pollTimeout, pollInterval).Should(o.BeNil())
185184
}
186185
})
187186
g.It("should revert to default collection profile when an empty collection profile value is specified", func() {
@@ -198,7 +197,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
198197
}
199198

200199
return nil
201-
}).Should(o.BeNil())
200+
}, pollTimeout, pollInterval).Should(o.BeNil())
202201
})
203202
})
204203

@@ -218,7 +217,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
218217
}
219218

220219
return nil
221-
}).Should(o.BeNil())
220+
}, pollTimeout, pollInterval).Should(o.BeNil())
222221
})
223222

224223
g.It("should hide default metrics", func() {
@@ -240,7 +239,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
240239
kubeStateMetricsMonitor = monitors.Items[0]
241240

242241
return nil
243-
}).Should(o.BeNil())
242+
}, pollTimeout, pollInterval).Should(o.BeNil())
244243

245244
var kubeStateMetricsMainMetrics []string
246245
kubeStateMetricsMonitorSpec := kubeStateMetricsMonitor.Spec
@@ -297,7 +296,7 @@ var _ = g.Describe("[sig-instrumentation][OCPFeatureGate:MetricsCollectionProfil
297296
}
298297

299298
return nil
300-
}).Should(o.BeNil())
299+
}, pollTimeout, pollInterval).Should(o.BeNil())
301300
})
302301
})
303302
})

test/extended/storage/driver_configuration.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import (
2626
const (
2727
projectName = "csi-driver-configuration"
2828
providerName = "csi.vsphere.vmware.com"
29+
pollTimeout = 5 * time.Minute
30+
pollInterval = 5 * time.Second
2931
)
3032

3133
// This is [Serial] because it modifies ClusterCSIDriver.
@@ -37,9 +39,6 @@ var _ = g.Describe("[sig-storage][FeatureGate:VSphereDriverConfiguration][Serial
3739
originalDriverConfigSpec *opv1.CSIDriverConfigSpec
3840
)
3941

40-
o.SetDefaultEventuallyTimeout(5 * time.Minute)
41-
o.SetDefaultEventuallyPollingInterval(5 * time.Second)
42-
4342
g.BeforeEach(func() {
4443
if !framework.ProviderIs("vsphere") {
4544
g.Skip("this test is only expected to work with vSphere clusters")
@@ -143,7 +142,7 @@ var _ = g.Describe("[sig-storage][FeatureGate:VSphereDriverConfiguration][Serial
143142
setClusterCSIDriverSnapshotOptions(ctx, oc, t.clusterCSIDriverOptions)
144143
o.Eventually(func() error {
145144
return loadAndCheckCloudConf(ctx, oc, "Snapshot", t.cloudConfigOptions, t.clusterCSIDriverOptions)
146-
}).Should(o.Succeed())
145+
}, pollTimeout, pollInterval).Should(o.Succeed())
147146

148147
validateSnapshotCreation(ctx, oc, t.successfulSnapshotsCreated)
149148
})
@@ -231,7 +230,7 @@ func validateSnapshotCreation(ctx context.Context, oc *exutil.CLI, successfulSna
231230
pvc, err := oc.AdminKubeClient().CoreV1().PersistentVolumeClaims(oc.Namespace()).Get(ctx, "test-pvc", metav1.GetOptions{})
232231
o.Expect(err).NotTo(o.HaveOccurred())
233232
return pvc.Status.Phase
234-
}).Should(o.Equal(v1.ClaimBound))
233+
}, pollTimeout, pollInterval).Should(o.Equal(v1.ClaimBound))
235234

236235
var wg sync.WaitGroup
237236
var snapshotsCreated = make([]string, 0, successfulSnapshotsCreated)
@@ -269,7 +268,7 @@ func validateSnapshotCreation(ctx context.Context, oc *exutil.CLI, successfulSna
269268
}
270269
e2e.Logf("Snapshots ready: %d/%d", snapshotsReady, successfulSnapshotsCreated)
271270
return snapshotsReady
272-
}).Should(o.Equal(successfulSnapshotsCreated), "not all snapshots are ready")
271+
}, pollTimeout, pollInterval).Should(o.Equal(successfulSnapshotsCreated), "not all snapshots are ready")
273272

274273
// Next snapshot creation should be over the set limit and fail.
275274
failedSnapshotName := "test-snapshot-failed"
@@ -289,7 +288,7 @@ func validateSnapshotCreation(ctx context.Context, oc *exutil.CLI, successfulSna
289288
}
290289
e2e.Logf("Error validation successful - snapshot: %s readyToUse: %t, error message: %s", failedSnapshotName, ready, errMsg)
291290
return strings.Contains(errMsg, "reaches the configured maximum") && !ready
292-
}).Should(o.BeTrue(), "snapshot creation should fail")
291+
}, pollTimeout, pollInterval).Should(o.BeTrue(), "snapshot creation should fail")
293292
}
294293

295294
func createTestPod(ctx context.Context, oc *exutil.CLI, pvcName string, namespace string) (*v1.Pod, error) {

test/extended/storage/s3_configuration.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package storage
22

33
import (
44
"context"
5-
"time"
65

76
g "github.com/onsi/ginkgo/v2"
87
o "github.com/onsi/gomega"
@@ -26,9 +25,6 @@ var _ = g.Describe("[sig-imageregistry][OCPFeatureGate:ChunkSizeMiB][Serial][api
2625
originalConfigSpec *imageregistryv1.ImageRegistryConfigStorageS3
2726
)
2827

29-
o.SetDefaultEventuallyTimeout(5 * time.Minute)
30-
o.SetDefaultEventuallyPollingInterval(5 * time.Second)
31-
3228
g.BeforeEach(func() {
3329

3430
skipIfNotS3Storage(oc)

0 commit comments

Comments
 (0)