Skip to content

Commit a323326

Browse files
authored
Merge pull request #587 from karlkfi/karl-stress-retry
chore: add stress test that validates retries
2 parents 0cb95ee + 9dcbe66 commit a323326

File tree

5 files changed

+232
-40
lines changed

5 files changed

+232
-40
lines changed

test/e2e/e2eutil/common.go

Lines changed: 65 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/google/go-cmp/cmp/cmpopts"
1515
"github.com/onsi/ginkgo/v2"
1616
"github.com/onsi/gomega"
17+
"github.com/onsi/gomega/gstruct"
1718
v1 "k8s.io/api/core/v1"
1819
apierrors "k8s.io/apimachinery/pkg/api/errors"
1920
"k8s.io/apimachinery/pkg/api/meta"
@@ -33,6 +34,8 @@ import (
3334
"sigs.k8s.io/controller-runtime/pkg/client"
3435
)
3536

37+
const TestIDLabel = "test-id"
38+
3639
func WithReplicas(obj *unstructured.Unstructured, replicas int) *unstructured.Unstructured {
3740
err := unstructured.SetNestedField(obj.Object, int64(replicas), "spec", "replicas")
3841
gomega.Expect(err).NotTo(gomega.HaveOccurred())
@@ -270,9 +273,7 @@ func Run(ch <-chan event.Event) error {
270273
return err
271274
}
272275

273-
func RunWithNoErr(ch <-chan event.Event) {
274-
RunCollectNoErr(ch)
275-
}
276+
var RunWithNoErr = RunCollectNoErr
276277

277278
func RunCollect(ch <-chan event.Event) []event.Event {
278279
var events []event.Event
@@ -282,14 +283,71 @@ func RunCollect(ch <-chan event.Event) []event.Event {
282283
return events
283284
}
284285

285-
func RunCollectNoErr(ch <-chan event.Event) []event.Event {
286-
events := RunCollect(ch)
287-
for _, e := range events {
288-
gomega.Expect(e.Type).NotTo(gomega.Equal(event.ErrorType))
286+
func RunCollectNoErr(ch <-chan event.Event, callerSkip ...int) []event.Event {
287+
skip := 0
288+
if len(callerSkip) > 0 {
289+
skip = callerSkip[0]
289290
}
291+
292+
events := RunCollect(ch)
293+
ExpectNoEventErrors(events, skip+1)
294+
ExpectNoReconcileTimeouts(events, skip+1)
290295
return events
291296
}
292297

298+
func ExpectNoEventErrors(events []event.Event, callerSkip ...int) {
299+
skip := 0
300+
if len(callerSkip) > 0 {
301+
skip = callerSkip[0]
302+
}
303+
304+
gomega.Expect(events).WithOffset(skip + 1).NotTo(
305+
gomega.ContainElement(gstruct.MatchFields(gstruct.IgnoreExtras,
306+
gstruct.Fields{
307+
"Type": gomega.Equal(event.ErrorType),
308+
})))
309+
gomega.Expect(events).WithOffset(skip + 1).NotTo(
310+
gomega.ContainElement(gstruct.MatchFields(gstruct.IgnoreExtras,
311+
gstruct.Fields{
312+
"Type": gomega.Equal(event.ApplyType),
313+
"ApplyEvent": gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
314+
"Status": gomega.Equal(event.ApplyFailed),
315+
}),
316+
})))
317+
gomega.Expect(events).WithOffset(skip + 1).NotTo(
318+
gomega.ContainElement(gstruct.MatchFields(gstruct.IgnoreExtras,
319+
gstruct.Fields{
320+
"Type": gomega.Equal(event.PruneType),
321+
"PruneEvent": gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
322+
"Status": gomega.Equal(event.PruneFailed),
323+
}),
324+
})))
325+
gomega.Expect(events).WithOffset(skip + 1).NotTo(
326+
gomega.ContainElement(gstruct.MatchFields(gstruct.IgnoreExtras,
327+
gstruct.Fields{
328+
"Type": gomega.Equal(event.DeleteType),
329+
"DeleteEvent": gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
330+
"Status": gomega.Equal(event.DeleteFailed),
331+
}),
332+
})))
333+
}
334+
335+
func ExpectNoReconcileTimeouts(events []event.Event, callerSkip ...int) {
336+
skip := 0
337+
if len(callerSkip) > 0 {
338+
skip = callerSkip[0]
339+
}
340+
341+
gomega.Expect(events).WithOffset(skip + 1).NotTo(
342+
gomega.ContainElement(gstruct.MatchFields(gstruct.IgnoreExtras,
343+
gstruct.Fields{
344+
"Type": gomega.Equal(event.WaitType),
345+
"WaitEvent": gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
346+
"Status": gomega.Equal(event.ReconcileTimeout),
347+
}),
348+
})))
349+
}
350+
293351
func ManifestToUnstructured(manifest []byte) *unstructured.Unstructured {
294352
u := make(map[string]interface{})
295353
err := yaml.Unmarshal(manifest, &u)

test/stress/stress_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ var _ = Describe("Stress", func() {
101101
thousandDeploymentsTest(ctx, c, invConfig, inventoryName, namespace.GetName())
102102
})
103103

104+
It("ThousandDeploymentsRetry", func() {
105+
thousandDeploymentsRetryTest(ctx, c, invConfig, inventoryName, namespace.GetName())
106+
})
107+
104108
It("ThousandNamespaces", func() {
105109
thousandNamespacesTest(ctx, c, invConfig, inventoryName, namespace.GetName())
106110
})
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
// Copyright 2022 The Kubernetes Authors.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package stress
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"time"
10+
11+
. "github.com/onsi/ginkgo/v2"
12+
. "github.com/onsi/gomega"
13+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
14+
"k8s.io/klog/v2"
15+
"sigs.k8s.io/cli-utils/pkg/apply"
16+
"sigs.k8s.io/cli-utils/pkg/apply/event"
17+
"sigs.k8s.io/cli-utils/pkg/common"
18+
"sigs.k8s.io/cli-utils/pkg/inventory"
19+
"sigs.k8s.io/cli-utils/test/e2e/e2eutil"
20+
"sigs.k8s.io/cli-utils/test/e2e/invconfig"
21+
"sigs.k8s.io/controller-runtime/pkg/client"
22+
)
23+
24+
// thousandDeploymentsRetryTest tests one pre-existing namespace with 1,000
25+
// Deployments in it. The wait timeout is set too short to confirm
26+
// reconciliation, but the apply/destroy is retried until success.
27+
//
28+
// The Deployments themselves are easy to get status on, but with the retrieval
29+
// of generated resource status (ReplicaSets & Pods), this becomes expensive.
30+
func thousandDeploymentsRetryTest(ctx context.Context, c client.Client, invConfig invconfig.InventoryConfig, inventoryName, namespaceName string) {
31+
By("Apply LOTS of resources")
32+
applier := invConfig.ApplierFactoryFunc()
33+
inventoryID := fmt.Sprintf("%s-%s", inventoryName, namespaceName)
34+
35+
inventoryInfo := invconfig.CreateInventoryInfo(invConfig, inventoryName, namespaceName, inventoryID)
36+
37+
resources := []*unstructured.Unstructured{}
38+
39+
deploymentObjTemplate := e2eutil.ManifestToUnstructured([]byte(deploymentYaml))
40+
deploymentObjTemplate.SetLabels(map[string]string{e2eutil.TestIDLabel: inventoryID})
41+
42+
objectCount := 1000
43+
44+
for i := 1; i <= objectCount; i++ {
45+
deploymentObj := deploymentObjTemplate.DeepCopy()
46+
deploymentObj.SetNamespace(namespaceName)
47+
48+
// change name & selector labels to avoid overlap between deployments
49+
name := fmt.Sprintf("nginx-%d", i)
50+
deploymentObj.SetName(name)
51+
err := unstructured.SetNestedField(deploymentObj.Object, name, "spec", "selector", "matchLabels", "app")
52+
Expect(err).ToNot(HaveOccurred())
53+
err = unstructured.SetNestedField(deploymentObj.Object, name, "spec", "template", "metadata", "labels", "app")
54+
Expect(err).ToNot(HaveOccurred())
55+
56+
resources = append(resources, deploymentObj)
57+
}
58+
59+
defer func() {
60+
By("Cleanup Deployments")
61+
e2eutil.DeleteAllUnstructuredIfExists(ctx, c, deploymentObjTemplate)
62+
}()
63+
64+
startTotal := time.Now()
65+
66+
var applierEvents []event.Event
67+
68+
maxAttempts := 15
69+
reconcileTimeout := 2 * time.Minute
70+
71+
for attempt := 1; attempt <= maxAttempts; attempt++ {
72+
start := time.Now()
73+
74+
applierEvents = e2eutil.RunCollect(applier.Run(ctx, inventoryInfo, resources, apply.ApplierOptions{
75+
// SSA reduces GET+PATCH to just PATCH, which is faster
76+
ServerSideOptions: common.ServerSideOptions{
77+
ServerSideApply: true,
78+
ForceConflicts: true,
79+
FieldManager: "cli-utils.kubernetes.io",
80+
},
81+
ReconcileTimeout: reconcileTimeout,
82+
EmitStatusEvents: false,
83+
}))
84+
85+
duration := time.Since(start)
86+
klog.Infof("Applier.Run execution time (attempt: %d): %v", attempt, duration)
87+
88+
e2eutil.ExpectNoEventErrors(applierEvents)
89+
90+
// Retry if ReconcileTimeout
91+
retry := false
92+
for _, e := range applierEvents {
93+
if e.Type == event.WaitType && e.WaitEvent.Status == event.ReconcileTimeout {
94+
retry = true
95+
}
96+
}
97+
if !retry {
98+
break
99+
}
100+
}
101+
102+
durationTotal := time.Since(startTotal)
103+
klog.Infof("Applier.Run total execution time (attempts: %d): %v", maxAttempts, durationTotal)
104+
105+
e2eutil.ExpectNoReconcileTimeouts(applierEvents)
106+
107+
By("Verify inventory created")
108+
invConfig.InvSizeVerifyFunc(ctx, c, inventoryName, namespaceName, inventoryID, len(resources), len(resources))
109+
110+
By(fmt.Sprintf("Verify %d Deployments created", objectCount))
111+
e2eutil.AssertUnstructuredCount(ctx, c, deploymentObjTemplate, objectCount)
112+
113+
By("Destroy LOTS of resources")
114+
destroyer := invConfig.DestroyerFactoryFunc()
115+
116+
startTotal = time.Now()
117+
118+
var destroyerEvents []event.Event
119+
120+
for attempt := 1; attempt <= maxAttempts; attempt++ {
121+
start := time.Now()
122+
123+
destroyerEvents = e2eutil.RunCollect(destroyer.Run(ctx, inventoryInfo, apply.DestroyerOptions{
124+
InventoryPolicy: inventory.PolicyAdoptIfNoInventory,
125+
DeleteTimeout: reconcileTimeout,
126+
}))
127+
128+
duration := time.Since(start)
129+
klog.Infof("Destroyer.Run execution time (attempt: %d): %v", attempt, duration)
130+
131+
e2eutil.ExpectNoEventErrors(destroyerEvents)
132+
133+
// Retry if ReconcileTimeout
134+
retry := false
135+
for _, e := range applierEvents {
136+
if e.Type == event.WaitType && e.WaitEvent.Status == event.ReconcileTimeout {
137+
retry = true
138+
}
139+
}
140+
if !retry {
141+
break
142+
}
143+
}
144+
145+
durationTotal = time.Since(startTotal)
146+
klog.Infof("Destroyer.Run total execution time (attempts: %d): %v", maxAttempts, durationTotal)
147+
148+
e2eutil.ExpectNoReconcileTimeouts(applierEvents)
149+
150+
By("Verify inventory deleted")
151+
invConfig.InvNotExistsFunc(ctx, c, inventoryName, namespaceName, inventoryID)
152+
153+
By(fmt.Sprintf("Verify %d Deployments deleted", objectCount))
154+
e2eutil.AssertUnstructuredCount(ctx, c, deploymentObjTemplate, 0)
155+
}

test/stress/thousand_deployments_test.go

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
1414
"k8s.io/klog/v2"
1515
"sigs.k8s.io/cli-utils/pkg/apply"
16-
"sigs.k8s.io/cli-utils/pkg/apply/event"
1716
"sigs.k8s.io/cli-utils/pkg/common"
1817
"sigs.k8s.io/cli-utils/pkg/inventory"
1918
"sigs.k8s.io/cli-utils/test/e2e/e2eutil"
@@ -35,11 +34,8 @@ func thousandDeploymentsTest(ctx context.Context, c client.Client, invConfig inv
3534

3635
resources := []*unstructured.Unstructured{}
3736

38-
labelKey := "created-for"
39-
labelValue := "stress-test"
40-
4137
deploymentObjTemplate := e2eutil.ManifestToUnstructured([]byte(deploymentYaml))
42-
deploymentObjTemplate.SetLabels(map[string]string{labelKey: labelValue})
38+
deploymentObjTemplate.SetLabels(map[string]string{e2eutil.TestIDLabel: inventoryID})
4339

4440
objectCount := 1000
4541

@@ -79,17 +75,8 @@ func thousandDeploymentsTest(ctx context.Context, c client.Client, invConfig inv
7975
duration := time.Since(start)
8076
klog.Infof("Applier.Run execution time: %v", duration)
8177

82-
for _, e := range applierEvents {
83-
Expect(e.ErrorEvent.Err).To(BeNil())
84-
}
85-
for _, e := range applierEvents {
86-
Expect(e.ApplyEvent.Error).To(BeNil(), "ApplyEvent: %v", e.ApplyEvent)
87-
}
88-
for _, e := range applierEvents {
89-
if e.Type == event.WaitType {
90-
Expect(e.WaitEvent.Status).To(BeElementOf(event.ReconcilePending, event.ReconcileSuccessful), "WaitEvent: %v", e.WaitEvent)
91-
}
92-
}
78+
e2eutil.ExpectNoEventErrors(applierEvents)
79+
e2eutil.ExpectNoReconcileTimeouts(applierEvents)
9380

9481
By("Verify inventory created")
9582
invConfig.InvSizeVerifyFunc(ctx, c, inventoryName, namespaceName, inventoryID, len(resources), len(resources))
@@ -110,17 +97,8 @@ func thousandDeploymentsTest(ctx context.Context, c client.Client, invConfig inv
11097
duration = time.Since(start)
11198
klog.Infof("Destroyer.Run execution time: %v", duration)
11299

113-
for _, e := range destroyerEvents {
114-
Expect(e.ErrorEvent.Err).To(BeNil())
115-
}
116-
for _, e := range destroyerEvents {
117-
Expect(e.PruneEvent.Error).To(BeNil(), "PruneEvent: %v", e.PruneEvent)
118-
}
119-
for _, e := range destroyerEvents {
120-
if e.Type == event.WaitType {
121-
Expect(e.WaitEvent.Status).To(BeElementOf(event.ReconcilePending, event.ReconcileSuccessful), "WaitEvent: %v", e.WaitEvent)
122-
}
123-
}
100+
e2eutil.ExpectNoEventErrors(destroyerEvents)
101+
e2eutil.ExpectNoReconcileTimeouts(destroyerEvents)
124102

125103
By("Verify inventory deleted")
126104
invConfig.InvNotExistsFunc(ctx, c, inventoryName, namespaceName, inventoryID)

test/stress/thousand_namespaces_test.go

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,14 @@ func thousandNamespacesTest(ctx context.Context, c client.Client, invConfig invc
4141

4242
resources := []*unstructured.Unstructured{crdObj}
4343

44-
labelKey := "created-for"
45-
labelValue := "stress-test"
46-
4744
namespaceObjTemplate := e2eutil.ManifestToUnstructured([]byte(namespaceYaml))
48-
namespaceObjTemplate.SetLabels(map[string]string{labelKey: labelValue})
45+
namespaceObjTemplate.SetLabels(map[string]string{e2eutil.TestIDLabel: inventoryID})
4946

5047
configMapObjTemplate := e2eutil.ManifestToUnstructured([]byte(configMapYaml))
51-
configMapObjTemplate.SetLabels(map[string]string{labelKey: labelValue})
48+
configMapObjTemplate.SetLabels(map[string]string{e2eutil.TestIDLabel: inventoryID})
5249

5350
cronTabObjTemplate := e2eutil.ManifestToUnstructured([]byte(cronTabYaml))
54-
cronTabObjTemplate.SetLabels(map[string]string{labelKey: labelValue})
51+
cronTabObjTemplate.SetLabels(map[string]string{e2eutil.TestIDLabel: inventoryID})
5552

5653
objectCount := 1000
5754

0 commit comments

Comments
 (0)