Skip to content

Commit 52d5a5c

Browse files
committed
Add chaos scenario
1 parent 3ba2891 commit 52d5a5c

File tree

8 files changed

+170
-6
lines changed

8 files changed

+170
-6
lines changed

.run/experiment (kind).run.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<configuration default="false" name="experiment (kind)" type="GoApplicationRunConfiguration" factoryName="Go Application">
33
<module name="kubernetes-controller-sharding" />
44
<working_directory value="$PROJECT_DIR$/webhosting-operator" />
5-
<parameters value="reconcile" />
5+
<parameters value="basic" />
66
<envs>
77
<env name="KUBECONFIG" value="$PROJECT_DIR$/hack/kind_kubeconfig.yaml" />
88
</envs>
@@ -12,4 +12,4 @@
1212
<filePath value="$PROJECT_DIR$/webhosting-operator/cmd/experiment/main.go" />
1313
<method v="2" />
1414
</configuration>
15-
</component>
15+
</component>

docs/evaluation.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,10 @@ Usage:
7575
experiment [command]
7676
7777
Available Scenarios
78-
basic Basic load test scenario (15m) that creates roughly 9k websites
79-
scale-out Scenario for testing scale-out with high churn rate
78+
Available Scenarios
79+
basic Basic load test, create 9k websites in 15 minutes
80+
chaos Create 4.5k websites over 15 minutes and terminate a random shard every 5 minutes
81+
scale-out Measure scale-out properties with a high churn rate
8082
...
8183
```
8284

webhosting-operator/config/experiment/base/rbac.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,15 @@ rules:
6161
- list
6262
- watch
6363
- deletecollection
64+
- apiGroups:
65+
- ""
66+
resources:
67+
- pods
68+
verbs:
69+
- get
70+
- list
71+
- watch
72+
- delete
6473
---
6574
apiVersion: rbac.authorization.k8s.io/v1
6675
kind: ClusterRoleBinding
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
4+
resources:
5+
- ../base
6+
7+
patches:
8+
- target:
9+
kind: Job
10+
name: experiment
11+
patch: |
12+
- op: add
13+
path: /spec/template/spec/containers/0/args/-
14+
value: chaos

webhosting-operator/pkg/experiment/scenario/all/all.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@ package all
1919

2020
import (
2121
_ "github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/pkg/experiment/scenario/basic"
22+
_ "github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/pkg/experiment/scenario/chaos"
2223
_ "github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/pkg/experiment/scenario/scale-out"
2324
)

webhosting-operator/pkg/experiment/scenario/basic/basic.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ type scenario struct {
4747
}
4848

4949
func (s *scenario) Description() string {
50-
return "Basic load test scenario (15m) that creates roughly 9k websites"
50+
return "Basic load test, create 9k websites in 15 minutes"
5151
}
5252

5353
func (s *scenario) LongDescription() string {
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
Copyright 2025 Tim Ebert.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package chaos
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"time"
23+
24+
"golang.org/x/time/rate"
25+
corev1 "k8s.io/api/core/v1"
26+
"sigs.k8s.io/controller-runtime/pkg/client"
27+
logf "sigs.k8s.io/controller-runtime/pkg/log"
28+
29+
webhostingv1alpha1 "github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/pkg/apis/webhosting/v1alpha1"
30+
"github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/pkg/experiment"
31+
"github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/pkg/experiment/generator"
32+
"github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/pkg/experiment/scenario/base"
33+
"github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/pkg/utils"
34+
)
35+
36+
const ScenarioName = "chaos"
37+
38+
func init() {
39+
s := &scenario{}
40+
s.Scenario = &base.Scenario{
41+
ScenarioName: ScenarioName,
42+
Delegate: s,
43+
}
44+
45+
experiment.RegisterScenario(s)
46+
}
47+
48+
type scenario struct {
49+
*base.Scenario
50+
}
51+
52+
func (s *scenario) Description() string {
53+
return "Create 4.5k websites over 15 minutes and terminate a random shard every 5 minutes"
54+
}
55+
56+
func (s *scenario) LongDescription() string {
57+
return `The ` + ScenarioName + ` scenario generates load and chaos for the webhosting-operator:
58+
- website creation: 4500 over 15m
59+
- website spec changes: 0.5/m per object, max 37.5/s
60+
- shard termination (pod deletion): 1/m
61+
`
62+
}
63+
64+
func (s *scenario) Prepare(ctx context.Context) error {
65+
s.Log.Info("Preparing themes")
66+
if err := generator.CreateThemes(ctx, s.Client, 50, generator.WithLabels(s.Labels), generator.WithOwnerReference(s.OwnerRef)); err != nil {
67+
return err
68+
}
69+
70+
s.Log.Info("Preparing projects")
71+
if err := generator.CreateProjects(ctx, s.Client, 20, generator.WithLabels(s.Labels), generator.WithOwnerReference(s.OwnerRef)); err != nil {
72+
return err
73+
}
74+
75+
return nil
76+
}
77+
78+
func (s *scenario) Run(ctx context.Context) error {
79+
// website-generator: creates about 4500 websites over 15 minutes
80+
if err := (&generator.Every{
81+
Name: "website-generator",
82+
Do: func(ctx context.Context, c client.Client) error {
83+
return generator.CreateWebsite(ctx, c, generator.WithLabels(s.Labels))
84+
},
85+
Rate: rate.Limit(5),
86+
}).AddToManager(s.Manager); err != nil {
87+
return fmt.Errorf("error adding website-generator: %w", err)
88+
}
89+
90+
// trigger individual spec changes for website every other minute
91+
// => peaks at about 37.5 spec changes per second at the end of the experiment
92+
// (triggers roughly double the reconciliation rate in website controller because of deployment watches)
93+
if err := (&generator.ForEach[*webhostingv1alpha1.Website]{
94+
Name: "website-mutator",
95+
Do: func(ctx context.Context, c client.Client, obj *webhostingv1alpha1.Website) error {
96+
return client.IgnoreNotFound(generator.MutateWebsite(ctx, c, obj, s.Labels))
97+
},
98+
Every: 2 * time.Minute,
99+
}).AddToManager(s.Manager); err != nil {
100+
return fmt.Errorf("error adding website-mutator: %w", err)
101+
}
102+
103+
// Terminate a random shard every 5 minutes
104+
if err := (&generator.Every{
105+
Name: "shard-terminator",
106+
Do: terminateRandomShard,
107+
Rate: rate.Every(5 * time.Minute),
108+
}).AddToManager(s.Manager); err != nil {
109+
return fmt.Errorf("error adding shard-terminator: %w", err)
110+
}
111+
112+
return s.Wait(ctx, 15*time.Minute)
113+
}
114+
115+
func terminateRandomShard(ctx context.Context, c client.Client) error {
116+
log := logf.FromContext(ctx)
117+
118+
podList := &corev1.PodList{}
119+
if err := c.List(ctx, podList,
120+
client.InNamespace(webhostingv1alpha1.NamespaceSystem),
121+
client.MatchingLabels{"app.kubernetes.io/name": webhostingv1alpha1.WebhostingOperatorName},
122+
); err != nil {
123+
return err
124+
}
125+
126+
if len(podList.Items) == 0 {
127+
log.Info("No shards found, skipping termination")
128+
return nil
129+
}
130+
131+
pod := utils.PickRandom(podList.Items)
132+
if err := c.Delete(ctx, &pod); err != nil {
133+
return err
134+
}
135+
136+
log.Info("Terminated shard", "pod", client.ObjectKeyFromObject(&pod))
137+
return nil
138+
}

webhosting-operator/pkg/experiment/scenario/scale-out/scale_out.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ type scenario struct {
4747
}
4848

4949
func (s *scenario) Description() string {
50-
return "Scenario for testing scale-out with high churn rate"
50+
return "Measure scale-out properties with a high churn rate"
5151
}
5252

5353
func (s *scenario) LongDescription() string {

0 commit comments

Comments
 (0)