Skip to content

Commit 7024616

Browse files
authored
add more collectors and analyzers (#1398)
1 parent f9eae1d commit 7024616

File tree

1 file changed

+195
-15
lines changed

1 file changed

+195
-15
lines changed

pkg/goods/support/host-support-bundle.tmpl.yaml

Lines changed: 195 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,19 @@ spec:
7373
collectorName: top
7474
command: top
7575
args: ['-b', '-n', '1']
76+
- run:
77+
collectorName: uname
78+
command: uname
79+
args: ['-a']
80+
- run:
81+
collectorName: "hostnames"
82+
command: "sh"
83+
args:
84+
- -c
85+
- |
86+
echo "hostname = $(hostname)"
87+
echo "/proc/sys/kernel/hostname = $(cat /proc/sys/kernel/hostname)"
88+
echo "uname -n = $(uname -n)"
7689
- run:
7790
collectorName: df
7891
command: df
@@ -88,6 +101,82 @@ spec:
88101
- run:
89102
collectorName: uptime
90103
command: uptime
104+
- run:
105+
collectorName: sestatus
106+
command: sestatus
107+
args: []
108+
- run:
109+
collectorName: apparmor-status
110+
command: apparmor_status
111+
args: []
112+
- run:
113+
collectorName: iptables
114+
command: iptables
115+
args: ["-L", "-v"]
116+
- run:
117+
collectorName: iptables-version
118+
command: iptables
119+
args: ["--version"]
120+
- run:
121+
collectorName: nftables-list
122+
command: nft
123+
args: ["list", "table", "filter"]
124+
- run:
125+
collectorName: "ipvsadm"
126+
command: "ipvsadm"
127+
args: ["-l", "-n"]
128+
- run:
129+
collectorName: "lsblk"
130+
command: "lsblk"
131+
args: ["--fs"]
132+
- run:
133+
collectorName: lvm
134+
command: pvdisplay
135+
args: []
136+
- run:
137+
collectorName: lvm
138+
command: vgdisplay
139+
args: []
140+
- run:
141+
collectorName: lvm
142+
command: lvdisplay
143+
args: []
144+
- run:
145+
collectorName: "netstat-ports"
146+
command: "netstat"
147+
args: ["-t", "-u", "-l", "-p", "-n"]
148+
- run:
149+
collectorName: "netstat-route-table"
150+
command: "netstat"
151+
args: ["-r", "-n"]
152+
- run:
153+
collectorName: "resolvectl-status"
154+
command: "resolvectl"
155+
args: ["status"]
156+
- run:
157+
collectorName: "resolv-conf"
158+
command: "cat"
159+
args: ["/etc/resolv.conf"]
160+
- run:
161+
collectorName: "systemd-resolved-conf"
162+
command: "cat"
163+
args: ["/etc/systemd/resolved.conf"]
164+
- run:
165+
collectorName: "nsswitch-conf"
166+
command: "cat"
167+
args: ["/etc/nsswitch.conf"]
168+
- run:
169+
collectorName: "hosts"
170+
command: "cat"
171+
args: ["/etc/hosts"]
172+
- run:
173+
collectorName: "ip-route-table"
174+
command: "ip"
175+
args: ["route"]
176+
- run:
177+
collectorName: "sysctl"
178+
command: "sysctl"
179+
args: ["-a"]
91180
- run:
92181
collectorName: k0s-version
93182
command: /usr/local/bin/k0s
@@ -116,6 +205,41 @@ spec:
116205
- copy:
117206
collectorName: runtime-config
118207
path: /etc/embedded-cluster/*
208+
- run:
209+
collectorName: "systemctl-firewalld-status"
210+
command: "systemctl"
211+
args: ["status", "firewalld"]
212+
- run:
213+
collectorName: "systemctl-resolved-status"
214+
command: "systemctl"
215+
args: ["status", "systemd-resolved"]
216+
# Systemd Service Configurations for CRI, Kubelet
217+
- run:
218+
collectorName: "systemctl-cat-journald"
219+
command: "systemctl"
220+
args: ["cat", "systemd-journald"]
221+
- run:
222+
collectorName: "systemctl-cat-resolved"
223+
command: "systemctl"
224+
args: ["cat", "systemd-resolved"]
225+
- run:
226+
collectorName: "systemctl-cat-k0scontroller"
227+
command: "systemctl"
228+
args: ["cat", "k0scontroller.service"]
229+
- run:
230+
collectorName: "systemctl-cat-k0sworker"
231+
command: "systemctl"
232+
args: ["cat", "k0sworker.service"]
233+
- run:
234+
collectorName: "journalctl-dmesg"
235+
command: "journalctl"
236+
args: ["--dmesg", "--no-pager", "-S", "7 days ago"]
237+
- copy:
238+
collectorName: "syslog"
239+
path: /var/log/syslog
240+
- copy:
241+
collectorName: "syslog" # Copy the previous syslog file as well in case the current one is rotated
242+
path: /var/log/syslog.1
119243
- run:
120244
collectorName: network-manager-logs
121245
command: journalctl
@@ -156,9 +280,33 @@ spec:
156280
collectorName: 'check-umount'
157281
command: 'sh'
158282
args: ['-c', 'command -v umount']
283+
- run:
284+
collectorName: "mount"
285+
command: "mount"
286+
args: ["-l"]
159287
- copy:
160288
collectorName: installer/lam-service-config
161289
path: /etc/systemd/system/local-artifact-mirror.service.d/*
290+
- run:
291+
collectorName: "ps-high-load"
292+
command: "sh"
293+
args: ["-c", "ps -eo s,user,cmd | grep ^[RD] | sort | uniq -c | sort -nbr | head -20"]
294+
- run:
295+
collectorName: "ps-detect-antivirus-and-security-tools"
296+
command: "sh"
297+
args: [-c, "ps -ef | grep -E 'clamav|sophos|esets_daemon|fsav|symantec|mfend|ds_agent|kav|bdagent|s1agent|falcon|illumio|xagt' | grep -v grep"]
298+
- filesystemPerformance:
299+
collectorName: filesystem-write-latency-etcd
300+
timeout: 5m
301+
directory: {{ .K0sDataDir }}/etcd
302+
fileSize: 22Mi
303+
operationSize: 2300
304+
datasync: true
305+
runTime: "0" # let it run to completion
306+
- run:
307+
collectorName: "localhost-ips"
308+
command: "sh"
309+
args: ["-c", "host localhost"]
162310
hostAnalyzers:
163311
- ipv4Interfaces:
164312
outcomes:
@@ -173,9 +321,9 @@ spec:
173321
outcomes:
174322
- fail:
175323
when: "< 2G"
176-
message: At least 2G of memory is recommended
324+
message: At least 2GB of memory is required, but less is present
177325
- pass:
178-
message: The system has at least 2G of memory
326+
message: At least 2GB of memory is present
179327
- diskUsage:
180328
checkName: Root disk usage
181329
collectorName: root-disk-usage
@@ -243,21 +391,21 @@ spec:
243391
outcomes:
244392
- fail:
245393
when: "false"
246-
message: Kubernetes API probing is reporting a failure
394+
message: Kubernetes API probing reported a failure
247395
- pass:
248396
when: "true"
249-
message: Kubernetes API probing is reporting success
397+
message: Kubernetes API probing reported success
250398
- textAnalyze:
251399
checkName: NetworkManager managing calico interfaces
252400
fileName: host-collectors/run-host/network-manager-logs.txt
253401
regex: 'device .*cali.+: state change: config'
254402
outcomes:
255403
- fail:
256404
when: "true"
257-
message: NetworkManager seems to be managing calico interfaces
405+
message: NetworkManager is managing Calico interfaces
258406
- pass:
259407
when: "false"
260-
message: NetworkManager isn't managing calico interfaces
408+
message: NetworkManager isn't managing Calico interfaces
261409
- hostServices:
262410
checkName: "Local Artifact Mirror"
263411
outcomes:
@@ -272,13 +420,13 @@ spec:
272420
outcomes:
273421
- fail:
274422
when: 'ntp == unsynchronized+inactive'
275-
message: 'System clock is not synchronized'
423+
message: NTP is inactive and the system clock is not synchronized. Enable NTP and synchronize the system clock to continue.
276424
- fail:
277425
when: 'ntp == unsynchronized+active'
278-
message: System clock is not yet synchronized
426+
message: NTP is enabled but the system clock is not synchronized. Synchronize the system clock to continue.
279427
- pass:
280428
when: 'ntp == synchronized+active'
281-
message: 'System clock is synchronized'
429+
message: NTP is enabled and the system clock is synchronized
282430
- fail:
283431
message: 'Unable to determine system clock status'
284432
- jsonCompare:
@@ -395,7 +543,7 @@ spec:
395543
message: "/proc filesystem is mounted"
396544
- fail:
397545
when: "false"
398-
message: "/proc filesystem is not mounted"
546+
message: /proc filesystem must be mounted, but it currently is not
399547
- textAnalyze:
400548
checkName: Check if 'modprobe' command exists in PATH
401549
fileName: host-collectors/run-host/check-modprobe.txt
@@ -406,7 +554,7 @@ spec:
406554
message: "'modprobe' command exists in PATH"
407555
- fail:
408556
when: "false"
409-
message: "'modprobe' command does not exist in PATH"
557+
message: "'modprobe' command must exist in PATH"
410558
- textAnalyze:
411559
checkName: Check if 'mount' command exists in PATH
412560
fileName: host-collectors/run-host/check-mount.txt
@@ -417,7 +565,7 @@ spec:
417565
message: "'mount' command exists in PATH"
418566
- fail:
419567
when: "false"
420-
message: "'mount' command does not exist in PATH"
568+
message: "'mount' command must exist in PATH"
421569
- textAnalyze:
422570
checkName: Check if 'umount' command exists in PATH
423571
fileName: host-collectors/run-host/check-umount.txt
@@ -428,15 +576,15 @@ spec:
428576
message: "'umount' command exists in PATH"
429577
- fail:
430578
when: "false"
431-
message: "'umount' command does not exist in PATH"
579+
message: "'umount' command must exist in PATH"
432580
- hostOS:
433581
checkName: Check minimum kernel version
434582
outcomes:
435583
- pass:
436584
when: "kernelVersion >= 3.10"
437-
message: "Minimum kernel version of 3.10 has been met"
585+
message: Kernel version must be at least 3.10
438586
- fail:
439-
message: "Minimum kernel version of 3.10 has not been met"
587+
message: Kernel version is at least 3.10
440588
- textAnalyze:
441589
checkName: Hostname Mismatch
442590
fileName: host-collectors/run-host/k0scontroller-logs.txt
@@ -448,3 +596,35 @@ spec:
448596
- pass:
449597
when: "false"
450598
message: "No signs of hostname changes found"
599+
- textAnalyze:
600+
checkName: Check if localhost resolves to 127.0.0.1
601+
fileName: host-collectors/run-host/localhost-ips.txt
602+
regex: 'localhost has address 127.0.0.1'
603+
outcomes:
604+
- fail:
605+
when: "false"
606+
message: "'localhost' does not resolve to 127.0.0.1. Ensure your /etc/hosts file contains an entry for 'localhost' with a loopback address of 127.0.0.1."
607+
- pass:
608+
when: "true"
609+
message: "'localhost' resolves to 127.0.0.1"
610+
- textAnalyze:
611+
checkName: "Detect Threat Management and Network Security Tools"
612+
fileName: host-collectors/run-host/ps-detect-antivirus-and-security-tools.txt
613+
regex: '\b(clamav|sophos|esets_daemon|fsav|symantec|mfend|ds_agent|kav|bdagent|s1agent|falcon|illumio|xagt)\b'
614+
ignoreIfNoFiles: true
615+
outcomes:
616+
- fail:
617+
when: "true"
618+
message: "Antivirus or network security tools detected. These tools are known to interfere with Kubernetes operation in various ways. If problems persist, disable these tools, or consult with your organization's system administrator to ensure that exceptions are made for Kubernetes operation."
619+
- pass:
620+
when: "false"
621+
message: "No antivirus or network security tools detected."
622+
- filesystemPerformance:
623+
checkName: Filesystem Write Latency
624+
collectorName: filesystem-write-latency-etcd
625+
outcomes:
626+
- pass:
627+
when: "p99 < 10ms"
628+
message: 'P99 write latency for the disk at {{ .K0sDataDir }}/etcd is {{ "{{" }} .P99 {{ "}}" }}, which is better than the 10 ms requirement.'
629+
- fail:
630+
message: 'P99 write latency for the disk at {{ .K0sDataDir }}/etcd is {{ "{{" }} .P99 {{ "}}" }}, but it must be less than 10 ms. A higher-performance disk is required.'

0 commit comments

Comments
 (0)