Skip to content

Commit 06b0f4b

Browse files
authored
Add preflight check for directory permissions to prevent etcd startup failures (#1901)
* Ensure data dir and its parent dirs have execute permissions
1 parent a49d57f commit 06b0f4b

File tree

7 files changed

+83
-13
lines changed

7 files changed

+83
-13
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ K0S_GO_VERSION = v1.30.9+k0s.0
1515
PREVIOUS_K0S_VERSION ?= v1.29.9+k0s.0-ec.0
1616
PREVIOUS_K0S_GO_VERSION ?= v1.29.9+k0s.0
1717
K0S_BINARY_SOURCE_OVERRIDE =
18-
TROUBLESHOOT_VERSION = v0.116.4
18+
TROUBLESHOOT_VERSION = v0.117.0
1919

2020
KOTS_VERSION = v$(shell awk '/^version/{print $$2}' pkg/addons/adminconsole/static/metadata.yaml | sed -E 's/([0-9]+\.[0-9]+\.[0-9]+).*/\1/')
2121
# When updating KOTS_BINARY_URL_OVERRIDE, also update the KOTS_VERSION above or

cmd/installer/cli/install.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ func preRunInstall(cmd *cobra.Command, flags *InstallCmdFlags) error {
232232
return fmt.Errorf("unable to write runtime config to disk: %w", err)
233233
}
234234

235+
if err := os.Chmod(runtimeconfig.EmbeddedClusterHomeDirectory(), 0755); err != nil {
236+
// don't fail as there are cases where we can't change the permissions (bind mounts, selinux, etc...),
237+
// and we handle and surface those errors to the user later (host preflights, checking exec errors, etc...)
238+
logrus.Debugf("unable to chmod embedded-cluster home dir: %s", err)
239+
}
240+
235241
return nil
236242
}
237243

cmd/installer/cli/join.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,12 @@ func runJoinVerifyAndPrompt(name string, flags JoinCmdFlags, jcmd *kotsadm.JoinC
247247
return fmt.Errorf("unable to write runtime config: %w", err)
248248
}
249249

250+
if err := os.Chmod(runtimeconfig.EmbeddedClusterHomeDirectory(), 0755); err != nil {
251+
// don't fail as there are cases where we can't change the permissions (bind mounts, selinux, etc...),
252+
// and we handle and surface those errors to the user later (host preflights, checking exec errors, etc...)
253+
logrus.Debugf("unable to chmod embedded-cluster home dir: %s", err)
254+
}
255+
250256
// check to make sure the version returned by the join token is the same as the one we are running
251257
if strings.TrimPrefix(jcmd.EmbeddedClusterVersion, "v") != strings.TrimPrefix(versions.Version, "v") {
252258
return fmt.Errorf("embedded cluster version mismatch - this binary is version %q, but the cluster is running version %q", versions.Version, jcmd.EmbeddedClusterVersion)

e2e/preflights_test.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,17 @@ func TestPreflights(t *testing.T) {
2020
})
2121
defer tc.Cleanup()
2222

23-
_, stderr, err := tc.RunCommandOnNode(0, []string{"apt-get update && apt-get install -y apt-utils netcat-traditional"})
23+
// set up incorrect permissions on data dir and parent dir
24+
_, stderr, err := tc.RunCommandOnNode(0, []string{
25+
"mkdir -p /var/lib/embedded-cluster && " +
26+
"chmod 744 /var/lib/embedded-cluster && " + // remove execute from data dir
27+
"chmod 744 /var/lib", // remove execute from parent dir
28+
})
29+
if err != nil {
30+
t.Fatalf("failed to adjust dir permissions: err=%v, stderr=%s", err, stderr)
31+
}
32+
33+
_, stderr, err = tc.RunCommandOnNode(0, []string{"apt-get update && apt-get install -y apt-utils netcat-traditional"})
2434
if err != nil {
2535
t.Fatalf("failed to install deps: err=%v, stderr=%s", err, stderr)
2636
}
@@ -95,6 +105,7 @@ func TestPreflights(t *testing.T) {
95105
"Kubelet Port Availability": true,
96106
"Calico Communication Port Availability": true,
97107
"Local Artifact Mirror Port Availability": true,
108+
"Data Directory Permissions": true,
98109
// as long as fio ran successfully, we're good
99110
"Filesystem Write Latency": true,
100111
}
@@ -142,6 +153,33 @@ func TestPreflights(t *testing.T) {
142153
}
143154
},
144155
},
156+
{
157+
name: "Should contain data directory permissions failures",
158+
assert: func(t *testing.T, results *types.Output) {
159+
for _, res := range results.Fail {
160+
if res.Title == "Data Directory Permissions" {
161+
// should not contain data dir as we automatically fix it
162+
if strings.Contains(res.Message, "/var/lib/embedded-cluster") {
163+
t.Errorf("failure message should not contain /var/lib/embedded-cluster directory: %s", res.Message)
164+
}
165+
// should contain parent dir as we don't automatically fix it
166+
if !strings.Contains(res.Message, "/var/lib.") {
167+
t.Errorf("failure message should contain /var/lib directory: %s", res.Message)
168+
}
169+
t.Logf("directory permissions check failed as expected: %s", res.Message)
170+
return
171+
}
172+
}
173+
// If we get here, check if it incorrectly passed
174+
for _, res := range results.Pass {
175+
if res.Title == "Data Directory Permissions" {
176+
t.Errorf("directory permissions check passed unexpectedly: %s", res.Message)
177+
return
178+
}
179+
}
180+
t.Errorf("directory permissions check not found in results")
181+
},
182+
},
145183
}
146184
for _, tt := range tests {
147185
t.Run(tt.name, func(t *testing.T) {

go.mod

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ require (
3232
github.com/replicatedhq/embedded-cluster/kinds v0.0.0
3333
github.com/replicatedhq/embedded-cluster/utils v0.0.0
3434
github.com/replicatedhq/kotskinds v0.0.0-20240814191029-3f677ee409a0
35-
github.com/replicatedhq/troubleshoot v0.116.4
35+
github.com/replicatedhq/troubleshoot v0.117.0
3636
github.com/sirupsen/logrus v1.9.3
3737
github.com/spf13/cobra v1.9.1
3838
github.com/spf13/viper v1.19.0
@@ -134,6 +134,7 @@ require (
134134
github.com/docker/go-metrics v0.0.1 // indirect
135135
github.com/docker/go-units v0.5.0 // indirect
136136
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
137+
github.com/ebitengine/purego v0.8.2 // indirect
137138
github.com/envoyproxy/go-control-plane v0.13.1 // indirect
138139
github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
139140
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
@@ -231,8 +232,7 @@ require (
231232
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
232233
github.com/segmentio/ksuid v1.0.4 // indirect
233234
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
234-
github.com/shirou/gopsutil/v3 v3.24.5 // indirect
235-
github.com/shoenig/go-m1cpu v0.1.6 // indirect
235+
github.com/shirou/gopsutil/v4 v4.25.1 // indirect
236236
github.com/shopspring/decimal v1.4.0 // indirect
237237
github.com/sourcegraph/conc v0.3.0 // indirect
238238
github.com/spf13/afero v1.11.0 // indirect

go.sum

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L
867867
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
868868
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
869869
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
870+
github.com/ebitengine/purego v0.8.2 h1:jPPGWs2sZ1UgOSgD2bClL0MJIqu58nOmIcBuXr62z1I=
871+
github.com/ebitengine/purego v0.8.2/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
870872
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
871873
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
872874
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -1439,8 +1441,8 @@ github.com/redis/go-redis/v9 v9.5.2/go.mod h1:hdY0cQFCN4fnSYT6TkisLufl/4W5UIXyv0
14391441
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
14401442
github.com/replicatedhq/kotskinds v0.0.0-20240814191029-3f677ee409a0 h1:Gi+Fs6583v7GmgQKJyaZuBzcih0z5YXBREDQ8AWY2JM=
14411443
github.com/replicatedhq/kotskinds v0.0.0-20240814191029-3f677ee409a0/go.mod h1:QjhIUu3+OmHZ09u09j3FCoTt8F3BYtQglS+OLmftu9I=
1442-
github.com/replicatedhq/troubleshoot v0.116.4 h1:SDa+bWiXArt4Ypkw3+qjMxl+QUWKZsR0t19A13Mx3G0=
1443-
github.com/replicatedhq/troubleshoot v0.116.4/go.mod h1:OQwNwp78Xkfa/VwzNnDyiTFAAsZK1u3wApYncskHVl0=
1444+
github.com/replicatedhq/troubleshoot v0.117.0 h1:FCw8VodGF/tetL7ZvdOhnjFDOvSDqMq/kce9/dsfHfc=
1445+
github.com/replicatedhq/troubleshoot v0.117.0/go.mod h1:Xt6P84cvEyfyp9J/7EblCqINXHeTc+1zqfJY/KqjOss=
14441446
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
14451447
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
14461448
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
@@ -1468,12 +1470,8 @@ github.com/segmentio/ksuid v1.0.4 h1:sBo2BdShXjmcugAMwjugoGUdUV0pcxY5mW4xKRn3v4c
14681470
github.com/segmentio/ksuid v1.0.4/go.mod h1:/XUiZBD3kVx5SmUOl55voK5yeAbBNNIed+2O73XgrPE=
14691471
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=
14701472
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
1471-
github.com/shirou/gopsutil/v3 v3.24.5 h1:i0t8kL+kQTvpAYToeuiVk3TgDeKOFioZO3Ztz/iZ9pI=
1472-
github.com/shirou/gopsutil/v3 v3.24.5/go.mod h1:bsoOS1aStSs9ErQ1WWfxllSeS1K5D+U30r2NfcubMVk=
1473-
github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM=
1474-
github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
1475-
github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
1476-
github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
1473+
github.com/shirou/gopsutil/v4 v4.25.1 h1:QSWkTc+fu9LTAWfkZwZ6j8MSUk4A2LV7rbH0ZqmLjXs=
1474+
github.com/shirou/gopsutil/v4 v4.25.1/go.mod h1:RoUCUpndaJFtT+2zsZzzmhvbfGoDCJ7nFXKJf8GqJbI=
14771475
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
14781476
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
14791477
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=

pkg/preflights/host-preflight.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,17 @@ spec:
164164
collectorName: 'selinux-mode'
165165
command: 'sh'
166166
args: ['-c', 'getenforce || echo "Missing"']
167+
- run:
168+
# check execute permissions on the data directory, its parents, and root.
169+
# this is necessary for executing binaries in the data directory by other users (e.g. etcd).
170+
collectorName: 'check-data-dir-permissions'
171+
command: 'sh'
172+
args:
173+
- -c
174+
- |
175+
dir="{{ .DataDir }}"
176+
while [ "$dir" != "/" ]; do find "$dir" -maxdepth 0 ! -perm -111; dir=$(dirname "$dir"); done
177+
find "/" -maxdepth 0 ! -perm -111
167178
analyzers:
168179
- cpu:
169180
checkName: CPU
@@ -1119,3 +1130,14 @@ spec:
11191130
- pass:
11201131
when: "Mode == Missing"
11211132
message: SELinux is not installed.
1133+
- textAnalyze:
1134+
checkName: Data Directory Permissions
1135+
fileName: host-collectors/run-host/check-data-dir-permissions.txt
1136+
regexGroups: '(?ms)(?P<Dirs>.*)'
1137+
outcomes:
1138+
- pass:
1139+
when: "Dirs == ''"
1140+
message: "The data directory ({{ .DataDir }}) and all its parent directories have execute permissions"
1141+
- fail:
1142+
message: >-
1143+
The following directories lack execute permissions: {{ `{{ .Dirs | trim | splitList "\n" | join ", " }}` }}.

0 commit comments

Comments
 (0)