Skip to content

Commit ba5dfae

Browse files
feat: adding support for kots application host preflights (#52)
runs host preflights before starting to install or upgrade the cluster. host preflights may be present as part of any embedded helm chart or as part of the embedded kots release. this is a sample output: ``` INFO[0001] Running host preflights on nodes ✓ Host Preflight checks completed on host 44.210.139.58 ✓ Host Preflight checks completed on host 54.227.214.131 ✓ Host Preflight checks completed on host 18.206.223.187 +----------------+--------+---------+---------------------------------------------------+ | ADDRESS | STATUS | TITLE | MESSAGE | +----------------+--------+---------+---------------------------------------------------+ | 54.227.214.131 | PASS | Port 23 | Port 23 is available | | 54.227.214.131 | PASS | Port 24 | Port 24 is available | | 18.206.223.187 | PASS | Port 23 | Port 23 is available | | 18.206.223.187 | PASS | Port 24 | Port 24 is available | | 44.210.139.58 | PASS | Port 23 | Port 23 is available | | 44.210.139.58 | PASS | Port 24 | Port 24 is available | | 54.227.214.131 | WARN | Port 22 | Another process was already listening on port 22. | | 18.206.223.187 | WARN | Port 22 | Another process was already listening on port 22. | | 44.210.139.58 | WARN | Port 22 | Another process was already listening on port 22. | +----------------+--------+---------+---------------------------------------------------+ WARN[0037] preflight had warnings on one or more hosts ? Do you want to continue ? Yes ``` in case of warnings user is asked for a confirmation before proceeding. in case of failures the install process is aborted.
1 parent c502b0a commit ba5dfae

File tree

18 files changed

+819
-82
lines changed

18 files changed

+819
-82
lines changed

.github/workflows/e2e.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ jobs:
1919
- TestMultiNodeInteractiveInstallation
2020
- TestInstallWithDisabledAddons
2121
- TestEmbedAddonsOnly
22+
- TestHostPreflight
2223
steps:
2324
- name: Move Docker aside
2425
run: |

Makefile

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ K0SCTL_VERSION = v0.15.5
77
TERRAFORM_VERSION = 1.5.4
88
OPENEBS_VERSION = 3.7.0
99
K0S_VERSION = v1.27.5+k0s.0
10+
PREFLIGHT_VERSION = v0.71.1
1011
LD_FLAGS = -X github.com/replicatedhq/helmvm/pkg/defaults.K0sVersion=$(K0S_VERSION) -X main.Version=$(VERSION)
1112

1213
default: helmvm-linux-amd64
@@ -94,9 +95,18 @@ pkg/goods/bins/helmvm/k0sctl-darwin-arm64:
9495
curl -L -o pkg/goods/bins/helmvm/k0sctl-darwin-arm64 "https://github.com/k0sproject/k0sctl/releases/download/$(K0SCTL_VERSION)/k0sctl-darwin-arm64"
9596
chmod +x pkg/goods/bins/helmvm/k0sctl-darwin-arm64
9697

98+
pkg/goods/bins/helmvm/preflight:
99+
mkdir -p pkg/goods/bins/helmvm
100+
mkdir -p output/tmp/preflight
101+
curl -L -o output/tmp/preflight/preflight.tar.gz https://github.com/replicatedhq/troubleshoot/releases/download/$(PREFLIGHT_VERSION)/preflight_linux_amd64.tar.gz
102+
tar -xzf output/tmp/preflight/preflight.tar.gz -C output/tmp/preflight
103+
mv output/tmp/preflight/preflight pkg/goods/bins/helmvm/preflight
104+
97105
.PHONY: static
98106
static: pkg/addons/adminconsole/charts/adminconsole-$(ADMIN_CONSOLE_CHART_VERSION).tgz \
99-
output/bin/yq pkg/goods/bins/k0sctl/k0s-$(K0S_VERSION) \
107+
output/bin/yq \
108+
pkg/goods/bins/helmvm/preflight \
109+
pkg/goods/bins/k0sctl/k0s-$(K0S_VERSION) \
100110
pkg/goods/images/list.txt
101111

102112
.PHONY: static-darwin-arm64

cmd/helmvm/install.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/replicatedhq/helmvm/pkg/defaults"
2525
"github.com/replicatedhq/helmvm/pkg/goods"
2626
"github.com/replicatedhq/helmvm/pkg/infra"
27+
"github.com/replicatedhq/helmvm/pkg/preflights"
2728
pb "github.com/replicatedhq/helmvm/pkg/progressbar"
2829
"github.com/replicatedhq/helmvm/pkg/prompts"
2930
)
@@ -53,13 +54,54 @@ func runPostApply(ctx context.Context) error {
5354
return nil
5455
}
5556

57+
// runHostPreflights run the host preflights we found embedded in the binary
58+
// on all configured hosts. We attempt to read HostPreflights from all the
59+
// embedded Helm Charts and from the Kots Application Release files.
60+
func runHostPreflights(c *cli.Context) error {
61+
logrus.Infof("Running host preflights on nodes")
62+
cfg, err := config.ReadConfigFile(defaults.PathToConfig("k0sctl.yaml"))
63+
if err != nil {
64+
return fmt.Errorf("unable to read cluster config: %w", err)
65+
}
66+
hpf, err := addons.NewApplier().HostPreflights()
67+
if err != nil {
68+
return fmt.Errorf("unable to read host preflights: %w", err)
69+
}
70+
if len(hpf.Collectors) == 0 && len(hpf.Analyzers) == 0 {
71+
logrus.Info("No host preflights found")
72+
return nil
73+
}
74+
outputs := preflights.NewOutputs()
75+
for _, host := range cfg.Spec.Hosts {
76+
addr := host.Address()
77+
out, err := preflights.Run(c.Context, host, hpf)
78+
if err != nil {
79+
return fmt.Errorf("preflight failed on %s: %w", addr, err)
80+
}
81+
outputs[addr] = out
82+
}
83+
outputs.PrintTable()
84+
if outputs.HaveFails() {
85+
return fmt.Errorf("preflights haven't passed on one or more hosts")
86+
}
87+
if !outputs.HaveWarns() || c.Bool("no-prompt") {
88+
return nil
89+
}
90+
logrus.Warn("Host preflights have warnings on one or more hosts")
91+
if !prompts.New().Confirm("Do you want to continue ?", false) {
92+
return fmt.Errorf("user aborted")
93+
}
94+
return nil
95+
}
96+
5697
// runPostApply runs the post-apply script on a host. XXX I don't think this
5798
// belongs here and needs to be refactored in a more generic way. It's here
5899
// because I have other things to do and this is a prototype.
59100
func runPostApplyOnHost(ctx context.Context, host *cluster.Host) error {
60101
if err := host.Connect(); err != nil {
61102
return fmt.Errorf("failed to connect to host: %w", err)
62103
}
104+
defer host.Disconnect()
63105
src := "/etc/systemd/system/k0scontroller.service"
64106
if host.Role == "worker" {
65107
src = "/etc/systemd/system/k0sworker.service"
@@ -276,6 +318,9 @@ func applyK0sctl(c *cli.Context, useprompt bool, nodes []infra.Node) error {
276318
if err := ensureK0sctlConfig(c, nodes, useprompt); err != nil {
277319
return fmt.Errorf("unable to create config file: %w", err)
278320
}
321+
if err := runHostPreflights(c); err != nil {
322+
return fmt.Errorf("unable to finish preflight checks: %w", err)
323+
}
279324
logrus.Infof("Applying cluster configuration")
280325
if err := runK0sctlApply(c.Context); err != nil {
281326
logrus.Errorf("Installation or upgrade failed.")

cmd/helmvm/join.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ var joinCommand = &cli.Command{
3333
if err := goods.Materialize(); err != nil {
3434
return fmt.Errorf("unable to materialize binaries: %w", err)
3535
}
36+
if err := runHostPreflightsLocally(c); err != nil {
37+
return fmt.Errorf("unable to run host preflights locally: %w", err)
38+
}
3639
logrus.Infof("Saving token to disk")
3740
if err := saveTokenToDisk(c.Args().First()); err != nil {
3841
return fmt.Errorf("unable to save token to disk: %w", err)

cmd/helmvm/upgrade.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
"github.com/replicatedhq/helmvm/pkg/addons"
1414
"github.com/replicatedhq/helmvm/pkg/defaults"
1515
"github.com/replicatedhq/helmvm/pkg/goods"
16+
"github.com/replicatedhq/helmvm/pkg/preflights"
17+
"github.com/replicatedhq/helmvm/pkg/prompts"
1618
)
1719

1820
func stopHelmVM() error {
@@ -56,6 +58,36 @@ func canRunUpgrade(c *cli.Context) error {
5658
return fmt.Errorf("command not available")
5759
}
5860

61+
// runHostPreflightsLocally runs the embedded host preflights in the local node prior to
62+
// node upgrade.
63+
func runHostPreflightsLocally(c *cli.Context) error {
64+
logrus.Infof("Running host preflights locally")
65+
hpf, err := addons.NewApplier().HostPreflights()
66+
if err != nil {
67+
return fmt.Errorf("unable to read host preflights: %w", err)
68+
}
69+
if len(hpf.Collectors) == 0 && len(hpf.Analyzers) == 0 {
70+
logrus.Info("No host preflights found")
71+
return nil
72+
}
73+
out, err := preflights.RunLocal(c.Context, hpf)
74+
if err != nil {
75+
return fmt.Errorf("preflight failed: %w", err)
76+
}
77+
out.PrintTable()
78+
if out.HasFail() {
79+
return fmt.Errorf("preflights haven't passed on one or more hosts")
80+
}
81+
if !out.HasWarn() || c.Bool("no-prompt") {
82+
return nil
83+
}
84+
logrus.Warn("Host preflights have warnings on one or more hosts")
85+
if !prompts.New().Confirm("Do you want to continue ?", false) {
86+
return fmt.Errorf("user aborted")
87+
}
88+
return nil
89+
}
90+
5991
var upgradeCommand = &cli.Command{
6092
Name: "upgrade",
6193
Usage: "Upgrade the local node",
@@ -78,6 +110,9 @@ var upgradeCommand = &cli.Command{
78110
if err := goods.Materialize(); err != nil {
79111
return fmt.Errorf("unable to materialize binaries: %w", err)
80112
}
113+
if err := runHostPreflightsLocally(c); err != nil {
114+
return fmt.Errorf("unable to run host preflights locally: %w", err)
115+
}
81116
logrus.Infof("Stopping %s", defaults.BinaryName())
82117
if err := stopHelmVM(); err != nil {
83118
return fmt.Errorf("unable to stop: %w", err)

e2e/install_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,3 +252,30 @@ func TestInstallWithDisabledAddons(t *testing.T) {
252252
t.Fatalf("fail to install embedded ssh in node 0: %v", err)
253253
}
254254
}
255+
256+
func TestHostPreflight(t *testing.T) {
257+
t.Parallel()
258+
tc := cluster.NewTestCluster(&cluster.Input{
259+
T: t,
260+
Nodes: 1,
261+
Image: "centos/8-Stream",
262+
SSHPublicKey: "../output/tmp/id_rsa.pub",
263+
SSHPrivateKey: "../output/tmp/id_rsa",
264+
HelmVMPath: "../output/bin/helmvm",
265+
})
266+
defer tc.Destroy()
267+
t.Log("installing ssh and binutils on node 0")
268+
commands := [][]string{
269+
{"dnf", "install", "-y", "openssh-server", "binutils", "tar"},
270+
{"systemctl", "enable", "sshd"},
271+
{"systemctl", "start", "sshd"},
272+
}
273+
if err := RunCommandsOnNode(t, tc, 0, commands); err != nil {
274+
t.Fatalf("fail to install ssh on node %s: %v", tc.Nodes[0], err)
275+
}
276+
t.Log("installing helmvm on node 0")
277+
line := []string{"embedded-preflight.sh"}
278+
if _, _, err := RunCommandOnNode(t, tc, 0, line); err != nil {
279+
t.Fatalf("fail to install helmvm on node %s: %v", tc.Nodes[0], err)
280+
}
281+
}

e2e/scripts/embedded-preflight.sh

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
preflight_with_failure="
5+
apiVersion: troubleshoot.sh/v1beta2
6+
kind: HostPreflight
7+
spec:
8+
collectors:
9+
- tcpPortStatus:
10+
collectorName: Port 24
11+
port: 24
12+
- tcpPortStatus:
13+
collectorName: Port 22
14+
port: 22
15+
analyzers:
16+
- tcpPortStatus:
17+
checkName: Port 24
18+
collectorName: Port 24
19+
outcomes:
20+
- fail:
21+
when: connection-refused
22+
message: Connection to port 24 was refused.
23+
- warn:
24+
when: address-in-use
25+
message: Another process was already listening on port 24.
26+
- fail:
27+
when: connection-timeout
28+
message: Timed out connecting to port 24.
29+
- fail:
30+
when: error
31+
message: Unexpected port status
32+
- pass:
33+
when: connected
34+
message: Port 24 is available
35+
- warn:
36+
message: Unexpected port status
37+
- tcpPortStatus:
38+
checkName: Port 22
39+
collectorName: Port 22
40+
outcomes:
41+
- fail:
42+
when: connection-refused
43+
message: Connection to port 22 was refused.
44+
- fail:
45+
when: address-in-use
46+
message: Another process was already listening on port 22.
47+
- fail:
48+
when: connection-timeout
49+
message: Timed out connecting to port 22.
50+
- fail:
51+
when: error
52+
message: Unexpected port status
53+
- pass:
54+
when: connected
55+
message: Port 22 is available
56+
- warn:
57+
message: Unexpected port status
58+
"
59+
60+
preflight_with_warning="
61+
apiVersion: troubleshoot.sh/v1beta2
62+
kind: HostPreflight
63+
spec:
64+
collectors:
65+
- tcpPortStatus:
66+
collectorName: Port 24
67+
port: 24
68+
- tcpPortStatus:
69+
collectorName: Port 22
70+
port: 22
71+
analyzers:
72+
- tcpPortStatus:
73+
checkName: Port 24
74+
collectorName: Port 24
75+
outcomes:
76+
- fail:
77+
when: connection-refused
78+
message: Connection to port 24 was refused.
79+
- warn:
80+
when: address-in-use
81+
message: Another process was already listening on port 24.
82+
- fail:
83+
when: connection-timeout
84+
message: Timed out connecting to port 24.
85+
- fail:
86+
when: error
87+
message: Unexpected port status
88+
- pass:
89+
when: connected
90+
message: Port 24 is available
91+
- warn:
92+
message: Unexpected port status
93+
- tcpPortStatus:
94+
checkName: Port 22
95+
collectorName: Port 22
96+
outcomes:
97+
- fail:
98+
when: connection-refused
99+
message: Connection to port 22 was refused.
100+
- warn:
101+
when: address-in-use
102+
message: Another process was already listening on port 22.
103+
- fail:
104+
when: connection-timeout
105+
message: Timed out connecting to port 22.
106+
- fail:
107+
when: error
108+
message: Unexpected port status
109+
- pass:
110+
when: connected
111+
message: Port 22 is available
112+
- warn:
113+
message: Unexpected port status
114+
"
115+
116+
embed_preflight() {
117+
content="$1"
118+
rm -rf /root/preflight*
119+
echo "$content" > /root/preflight.yaml
120+
tar -czvf /root/preflight.tar.gz /root/preflight.yaml
121+
objcopy --input-target binary --output-target binary --rename-section .data=sec_bundle /root/preflight.tar.gz /root/preflight.o
122+
rm -rf /usr/local/bin/helmvm
123+
cp -Rfp /usr/local/bin/helmvm-copy /usr/local/bin/helmvm
124+
objcopy --add-section sec_bundle=/root/preflight.o /usr/local/bin/helmvm
125+
}
126+
127+
has_applied_host_preflight() {
128+
if ! grep -q "Port 24 is available" /tmp/log ; then
129+
return 1
130+
fi
131+
if ! grep -q "Another process was already listening on port 22" /tmp/log ; then
132+
return 1
133+
fi
134+
}
135+
136+
wait_for_healthy_node() {
137+
ready=$(kubectl get nodes | grep -v NotReady | grep -c Ready || true)
138+
counter=0
139+
while [ "$ready" -lt "1" ]; do
140+
if [ "$counter" -gt 36 ]; then
141+
return 1
142+
fi
143+
sleep 5
144+
counter=$((counter+1))
145+
echo "Waiting for node to be ready"
146+
ready=$(kubectl get nodes | grep -v NotReady | grep -c Ready || true)
147+
kubectl get nodes || true
148+
done
149+
return 0
150+
}
151+
152+
main() {
153+
cp -Rfp /usr/local/bin/helmvm /usr/local/bin/helmvm-copy
154+
embed_preflight "$preflight_with_failure"
155+
if helmvm install --no-prompt 2>&1 | tee /tmp/log ; then
156+
cat /tmp/log
157+
echo "Expected installation to fail"
158+
exit 1
159+
fi
160+
if ! has_applied_host_preflight; then
161+
echo "Install hasn't applied host preflight"
162+
cat /tmp/log
163+
exit 1
164+
fi
165+
mv /tmp/log /tmp/log-failure
166+
embed_preflight "$preflight_with_warning"
167+
if ! helmvm install --no-prompt 2>&1 | tee /tmp/log ; then
168+
cat /etc/os-release
169+
echo "Failed to install helmvm"
170+
exit 1
171+
fi
172+
if ! grep -q "You can now access your cluster" /tmp/log; then
173+
echo "Failed to install helmvm"
174+
exit 1
175+
fi
176+
if ! has_applied_host_preflight; then
177+
echo "Install hasn't applied host preflight"
178+
cat /tmp/log
179+
exit 1
180+
fi
181+
if ! wait_for_healthy_node; then
182+
echo "Failed to install helmvm"
183+
exit 1
184+
fi
185+
}
186+
187+
export KUBECONFIG=/root/.helmvm/etc/kubeconfig
188+
export PATH=$PATH:/root/.helmvm/bin
189+
main

0 commit comments

Comments
 (0)