Skip to content

Commit 9150c74

Browse files
feat: add host preflights for all needed ports (#1205)
* feat: add host preflights for all needed ports checks if all needed ports are available on the server prior to start the installation. * chore: add e2e test for port failures make sure preflights are failing if some ports are in use. * feat: auto reboot node on reset now we reboot the node after finishing the reset command. * chore(e2e): wait for dbus + remove --reboot from reset * chore: simplify things by just sleeping * chore: log when sleeping * chore(e2e): persist default route
1 parent 2c793d9 commit 9150c74

File tree

7 files changed

+481
-30
lines changed

7 files changed

+481
-30
lines changed

cmd/embedded-cluster/reset.go

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,6 @@ var resetCommand = &cli.Command{
365365
Usage: "Disable interactive prompts",
366366
Value: false,
367367
},
368-
&cli.BoolFlag{
369-
Name: "reboot",
370-
Usage: "Reboot system after resetting the node",
371-
Value: false,
372-
},
373368
},
374369
Usage: fmt.Sprintf("Remove %s from the current node", binName),
375370
Action: func(c *cli.Context) error {
@@ -378,7 +373,7 @@ var resetCommand = &cli.Command{
378373
}
379374

380375
logrus.Info("This will remove this node from the cluster and completely reset it, removing all data stored on the node.")
381-
logrus.Info("Do not reset another node until this is complete.")
376+
logrus.Info("This action will cause the node to reboot. Do not reset another node until this is complete.")
382377
if !c.Bool("force") && !c.Bool("no-prompt") && !prompts.New().Confirm("Do you want to continue?", false) {
383378
return fmt.Errorf("Aborting")
384379
}
@@ -449,10 +444,6 @@ var resetCommand = &cli.Command{
449444
return err
450445
}
451446

452-
if !c.Bool("reboot") {
453-
logrus.Infof("Node has been reset. Please reboot to ensure transient configuration is also reset.")
454-
}
455-
456447
if err := helpers.RemoveAll(defaults.PathToK0sConfig()); err != nil {
457448
return fmt.Errorf("failed to remove k0s config: %w", err)
458449
}
@@ -501,10 +492,8 @@ var resetCommand = &cli.Command{
501492
return fmt.Errorf("failed to remove k0s binary: %w", err)
502493
}
503494

504-
if c.Bool("reboot") {
505-
if _, err := exec.Command("reboot").Output(); err != nil {
506-
return err
507-
}
495+
if _, err := exec.Command("reboot").Output(); err != nil {
496+
return err
508497
}
509498

510499
return nil

e2e/cluster/cluster.go

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -359,22 +359,20 @@ func ConfigureProxy(in *Input) {
359359
// them trust it.
360360
for i := 0; i < in.Nodes; i++ {
361361
name := fmt.Sprintf("node-%s-%02d", in.id, i)
362-
for _, cmd := range [][]string{
363-
{"ip", "route", "del", "default"},
364-
{"ip", "route", "add", "default", "via", "10.0.0.254"},
365-
{"mkdir", "-p", "/usr/local/share/ca-certificates/proxy"},
366-
} {
367-
RunCommandOnNode(in, cmd, name)
368-
}
362+
RunCommandOnNode(in, []string{"mkdir", "-p", "/usr/local/share/ca-certificates/proxy"}, name)
369363

370364
CopyFileToNode(in, name, File{
371365
SourcePath: "/tmp/ca.crt",
372366
DestPath: "/usr/local/share/ca-certificates/proxy/ca.crt",
373367
Mode: 0644,
374368
})
375369

376-
cmd := []string{"update-ca-certificates"}
377-
RunCommandOnNode(in, cmd, name)
370+
for _, cmd := range [][]string{
371+
{"update-ca-certificates"},
372+
{"/usr/local/bin/default-route-through-proxy.sh"},
373+
} {
374+
RunCommandOnNode(in, cmd, name)
375+
}
378376
}
379377
}
380378

e2e/install_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,9 @@ func TestResetAndReinstall(t *testing.T) {
624624
t.Fatalf("fail to reset the installation: %v", err)
625625
}
626626

627+
t.Logf("%s: waiting for nodes to reboot", time.Now().Format(time.RFC3339))
628+
time.Sleep(30 * time.Second)
629+
627630
t.Logf("%s: installing embedded-cluster on node 0 after reset", time.Now().Format(time.RFC3339))
628631
line = []string{"single-node-install.sh", "ui"}
629632
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
@@ -698,6 +701,9 @@ func TestResetAndReinstallAirgap(t *testing.T) {
698701
t.Fatalf("fail to reset the installation: %v", err)
699702
}
700703

704+
t.Logf("%s: waiting for nodes to reboot", time.Now().Format(time.RFC3339))
705+
time.Sleep(30 * time.Second)
706+
701707
t.Logf("%s: installing embedded-cluster on node 0", time.Now().Format(time.RFC3339))
702708
line = []string{"single-node-airgap-install.sh"}
703709
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {

e2e/preflights_test.go

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,24 @@ func TestPreflights(t *testing.T) {
2929
})
3030

3131
_, stderr, err := container.Exec(cli,
32-
"apt-get update && apt-get install -y apt-utils kmod",
32+
"apt-get update && apt-get install -y apt-utils kmod netcat-traditional",
3333
)
3434
if err != nil {
3535
t.Fatalf("failed to install deps: err=%v, stderr=%s", err, stderr)
3636
}
3737

38+
if _, stderr, err = container.Exec(cli, "nohup netcat -l -p 10250 &"); err != nil {
39+
t.Fatalf("failed to start netcat: err=%v, stderr=%s", err, stderr)
40+
}
41+
42+
if _, stderr, err = container.Exec(cli, "nohup netcat -l 127.0.0.1 -p 50000 &"); err != nil {
43+
t.Fatalf("failed to start netcat: err=%v, stderr=%s", err, stderr)
44+
}
45+
46+
if _, stderr, err = container.Exec(cli, "nohup netcat -l -u -p 4789 &"); err != nil {
47+
t.Fatalf("failed to start netcat: err=%v, stderr=%s", err, stderr)
48+
}
49+
3850
runCmd := fmt.Sprintf("%s install run-preflights --no-prompt", container.GetECBinaryPath())
3951
if os.Getenv("LICENSE_PATH") != "" {
4052
runCmd = fmt.Sprintf("%s --license %s", runCmd, container.GetLicensePath())
@@ -93,10 +105,13 @@ func TestPreflights(t *testing.T) {
93105
assert: func(t *testing.T, results *preflights.Output) {
94106
expected := map[string]bool{
95107
// TODO: work to remove these
96-
"System Clock": true,
97-
"'devices' Cgroup Controller": true,
98-
"API Access": true,
99-
"Proxy Registry Access": true,
108+
"System Clock": true,
109+
"'devices' Cgroup Controller": true,
110+
"API Access": true,
111+
"Proxy Registry Access": true,
112+
"Kubelet Port Availability": true,
113+
"Calico Communication Port Availability": true,
114+
"Local Artifact Mirror Port Availability": true,
100115
// as long as fio ran successfully, we're good
101116
"Filesystem Write Latency": true,
102117
}
@@ -124,6 +139,26 @@ func TestPreflights(t *testing.T) {
124139
}
125140
},
126141
},
142+
{
143+
name: "Should contain port failures",
144+
assert: func(t *testing.T, results *preflights.Output) {
145+
expected := map[string]bool{
146+
"Kubelet Port Availability": false,
147+
"Calico Communication Port Availability": false,
148+
"Local Artifact Mirror Port Availability": false,
149+
}
150+
for _, res := range results.Fail {
151+
if _, ok := expected[res.Title]; ok {
152+
expected[res.Title] = true
153+
}
154+
}
155+
for title, found := range expected {
156+
if !found {
157+
t.Errorf("expected port failure not found: %q", title)
158+
}
159+
}
160+
},
161+
},
127162
}
128163
for _, tt := range tests {
129164
t.Run(tt.name, func(t *testing.T) {

e2e/restore_test.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ func TestSingleNodeDisasterRecovery(t *testing.T) {
6565
t.Fatalf("fail to reset the installation: %v", err)
6666
}
6767

68+
t.Logf("%s: waiting for nodes to reboot", time.Now().Format(time.RFC3339))
69+
time.Sleep(30 * time.Second)
70+
6871
t.Logf("%s: restoring the installation", time.Now().Format(time.RFC3339))
6972
line = append([]string{"restore-installation.exp"}, testArgs...)
7073
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
@@ -156,6 +159,9 @@ func TestSingleNodeDisasterRecoveryWithProxy(t *testing.T) {
156159
t.Fatalf("fail to reset the installation: %v", err)
157160
}
158161

162+
t.Logf("%s: waiting for nodes to reboot", time.Now().Format(time.RFC3339))
163+
time.Sleep(30 * time.Second)
164+
159165
t.Logf("%s: restoring the installation", time.Now().Format(time.RFC3339))
160166
line = append([]string{"restore-installation.exp"}, testArgs...)
161167
line = append(line, "--http-proxy", cluster.HTTPProxy)
@@ -229,6 +235,9 @@ func TestSingleNodeResumeDisasterRecovery(t *testing.T) {
229235
t.Fatalf("fail to reset the installation: %v", err)
230236
}
231237

238+
t.Logf("%s: waiting for nodes to reboot", time.Now().Format(time.RFC3339))
239+
time.Sleep(30 * time.Second)
240+
232241
t.Logf("%s: restoring the installation", time.Now().Format(time.RFC3339))
233242
line = append([]string{"resume-restore.exp"}, testArgs...)
234243
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
@@ -329,6 +338,10 @@ func TestSingleNodeAirgapDisasterRecovery(t *testing.T) {
329338
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
330339
t.Fatalf("fail to reset the installation: %v", err)
331340
}
341+
342+
t.Logf("%s: waiting for nodes to reboot", time.Now().Format(time.RFC3339))
343+
time.Sleep(30 * time.Second)
344+
332345
installTestDependenciesDebian(t, tc, 0, true)
333346
t.Logf("%s: restoring the installation", time.Now().Format(time.RFC3339))
334347
testArgs = append(testArgs, "--pod-cidr", "10.128.0.0/20", "--service-cidr", "10.129.0.0/20")
@@ -463,7 +476,7 @@ func TestMultiNodeHADisasterRecovery(t *testing.T) {
463476
}
464477

465478
// reset the cluster
466-
line = []string{"reset-installation.sh", "--force", "--reboot"}
479+
line = []string{"reset-installation.sh", "--force"}
467480
t.Logf("%s: resetting the installation on node 2", time.Now().Format(time.RFC3339))
468481
if _, _, err := RunCommandOnNode(t, tc, 2, line); err != nil {
469482
t.Fatalf("fail to reset the installation: %v", err)
@@ -681,7 +694,7 @@ func TestMultiNodeAirgapHADisasterRecovery(t *testing.T) {
681694
}
682695

683696
// reset the cluster
684-
line = []string{"reset-installation.sh", "--force", "--reboot"}
697+
line = []string{"reset-installation.sh", "--force"}
685698
t.Logf("%s: resetting the installation on node 2", time.Now().Format(time.RFC3339))
686699
if _, _, err := RunCommandOnNode(t, tc, 2, line); err != nil {
687700
t.Fatalf("fail to reset the installation: %v", err)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
3+
echo "
4+
[Unit]
5+
Description=Set default route through proxy node
6+
After=network-online.targej
7+
Wants=network-online.target
8+
9+
[Service]
10+
Type=oneshot
11+
ExecStartPre=/bin/sleep 5
12+
ExecStart=/bin/bash -c 'ip route del default; ip route add default via 10.0.0.254'
13+
RemainAfterExit=true
14+
15+
[Install]
16+
WantedBy=multi-user.target" > /etc/systemd/system/default-route-through-proxy.service
17+
18+
systemctl daemon-reload
19+
systemctl enable default-route-through-proxy
20+
systemctl start default-route-through-proxy

0 commit comments

Comments
 (0)