Skip to content

Commit deb52e7

Browse files
committed
Addressing Leonid's comments
Signed-off-by: Lazar Cvetković <l.cvetkovic.997@gmail.com>
1 parent 5381a8d commit deb52e7

File tree

3 files changed

+45
-51
lines changed

3 files changed

+45
-51
lines changed

pkg/driver/failure/knative_delete_control_plane.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,7 @@ kubectl delete pod $(kubectl get pods -n kube-system -o name | cut -c 5- | grep
2727
# kube-scheduler
2828
kubectl delete pod $(kubectl get pods -n kube-system -o name | cut -c 5- | grep kube-scheduler | tail -n 1) -n kube-system &
2929

30+
# istiod
31+
kubectl delete pod $(kubectl get pods -n istio-system -o name | grep istiod | cut -c 5- | tail -n 1) -n istio-system &
32+
3033
# TODO: make an automatic way to choose leaders instead of picking a random one to kill

pkg/driver/failure/knative_delete_data_plane.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,3 @@ kubectl delete pod $(kubectl get pods -n istio-system -o name | grep cluster-loc
88

99
# istio-ingressgateway
1010
kubectl delete pod $(kubectl get pods -n istio-system -o name | grep istio-ingressgateway | cut -c 5- | tail -n 1) -n istio-system &
11-
12-
# istiod
13-
kubectl delete pod $(kubectl get pods -n istio-system -o name | grep istiod | cut -c 5- | tail -n 1) -n istio-system &

pkg/driver/failure/triggers.go

Lines changed: 42 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,98 +9,92 @@ import (
99
"time"
1010
)
1111

12-
const NodeSeparator = " "
12+
const (
13+
NodeSeparator = " "
14+
15+
ControlPlaneFailure = "control_plane"
16+
DataPlaneFailure = "data_plane"
17+
WorkerNodeFailure = "worker_node"
18+
)
1319

1420
func ScheduleFailure(platform string, config *config.FailureConfiguration) {
1521
if config != nil && config.FailAt != 0 && config.FailComponent != "" {
22+
time.Sleep(time.Duration(config.FailAt) * time.Second)
23+
1624
switch platform {
1725
case "Knative", "Knative-RPS":
18-
triggerKnativeFailure(config.FailNode, config.FailComponent, config.FailAt)
26+
triggerKnativeFailure(config.FailNode, config.FailComponent)
1927
case "Dirigent", "Dirigent-RPS":
20-
triggerDirigentFailure(config.FailNode, config.FailComponent, config.FailAt)
28+
triggerDirigentFailure(config.FailNode, config.FailComponent)
2129
default:
2230
logrus.Errorf("No specified failure handler for given type of system.")
2331
}
2432
}
2533
}
2634

27-
func triggerKnativeFailure(nodes string, component string, t int) {
28-
time.Sleep(time.Duration(t) * time.Second)
35+
func invokeRemotely(command []string, nodes string) {
36+
splitNodes := strings.Split(nodes, NodeSeparator)
37+
wg := &sync.WaitGroup{}
38+
39+
for _, node := range splitNodes {
40+
wg.Add(1)
41+
42+
go func(command []string, node string) {
43+
defer wg.Done()
2944

45+
finalCommand := append([]string{"ssh", "-o", "StrictHostKeyChecking=no", node}, command...)
46+
invokeLocally(finalCommand)
47+
}(command, node)
48+
}
49+
50+
wg.Wait()
51+
}
52+
53+
func triggerKnativeFailure(nodes string, component string) {
3054
var command []string
3155
switch component {
32-
case "control_plane":
56+
case ControlPlaneFailure:
3357
command = []string{"bash", "./pkg/driver/failure/knative_delete_control_plane.sh"}
34-
case "data_plane":
58+
case DataPlaneFailure:
3559
command = []string{"bash", "./pkg/driver/failure/knative_delete_data_plane.sh"}
36-
case "worker_node":
60+
case WorkerNodeFailure:
3761
command = []string{"sudo", "systemctl", "restart", "kubelet"}
3862
default:
3963
logrus.Fatal("Invalid component to fail.")
4064
}
4165

4266
if component != "worker_node" {
43-
invokeCommand(command, t)
67+
invokeLocally(command)
4468
} else {
45-
splitNodes := strings.Split(nodes, NodeSeparator)
46-
wg := &sync.WaitGroup{}
47-
48-
for _, node := range splitNodes {
49-
wg.Add(1)
50-
51-
go func(command []string, node string, t int) {
52-
defer wg.Done()
53-
54-
finalCommand := append([]string{"ssh", "-o", "StrictHostKeyChecking=no", node}, command...)
55-
invokeCommand(finalCommand, t)
56-
}(command, node, t)
57-
}
58-
59-
wg.Wait()
69+
invokeRemotely(command, nodes)
6070
}
6171
}
6272

63-
func triggerDirigentFailure(nodes string, component string, t int) {
64-
time.Sleep(time.Duration(t) * time.Second)
65-
73+
func triggerDirigentFailure(nodes string, component string) {
6674
var command []string
6775
switch component {
68-
case "control_plane":
76+
case ControlPlaneFailure:
6977
command = []string{"sudo", "systemctl", "restart", "control_plane"}
70-
case "data_plane":
78+
case DataPlaneFailure:
7179
command = []string{"sudo", "systemctl", "restart", "data_plane"}
72-
case "worker_node":
80+
case WorkerNodeFailure:
7381
command = []string{"sudo", "systemctl", "restart", "worker_node"}
7482
default:
7583
logrus.Fatal("Invalid component to fail.")
7684
}
7785

7886
if nodes == "" {
79-
invokeCommand(command, t)
87+
invokeLocally(command)
8088
} else {
81-
splitNodes := strings.Split(nodes, " ")
82-
wg := &sync.WaitGroup{}
83-
84-
for _, node := range splitNodes {
85-
wg.Add(1)
86-
87-
go func(command []string, node string, t int) {
88-
defer wg.Done()
89-
90-
finalCommand := append([]string{"ssh", "-o", "StrictHostKeyChecking=no", node}, command...)
91-
invokeCommand(finalCommand, t)
92-
}(command, node, t)
93-
}
94-
95-
wg.Wait()
89+
invokeRemotely(command, nodes)
9690
}
9791
}
9892

99-
func invokeCommand(command []string, t int) {
93+
func invokeLocally(command []string) {
10094
cmd := exec.Command(command[0], command[1:]...)
10195
output, err := cmd.CombinedOutput()
10296
if err != nil {
103-
logrus.Errorf("Error triggering %s failure at t = %d - %v", command, t, err)
97+
logrus.Errorf("Error triggering %s failure - %v", command, err)
10498
return
10599
}
106100

0 commit comments

Comments
 (0)