Skip to content

Commit b8958ed

Browse files
jbtrystramjlebon
authored andcommitted
tests: add a remote kdump test
This test setups two machines to test if kdump successfully exports vmcore to a SSH destination. This sets a fairly large timeout for kdump to become active as generating the initramfs can be long on slower systems. Fixes coreos/fedora-coreos-tracker#1753
1 parent b241f8b commit b8958ed

File tree

1 file changed

+219
-0
lines changed

1 file changed

+219
-0
lines changed

mantle/kola/tests/ignition/kdump.go

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
package ignition
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"strings"
7+
"time"
8+
9+
"github.com/coreos/coreos-assembler/mantle/kola"
10+
"github.com/coreos/coreos-assembler/mantle/kola/cluster"
11+
"github.com/coreos/coreos-assembler/mantle/kola/register"
12+
"github.com/coreos/coreos-assembler/mantle/platform"
13+
"github.com/coreos/coreos-assembler/mantle/platform/conf"
14+
"github.com/coreos/coreos-assembler/mantle/platform/machine/qemu"
15+
"github.com/coreos/coreos-assembler/mantle/util"
16+
)
17+
18+
// Test kdump to remote hosts
19+
20+
func init() {
21+
// Create 0 cluster size to allow starting and setup ssh server as needed for the test
22+
// See: https://github.com/coreos/coreos-assembler/pull/1310#discussion_r401908836
23+
register.RegisterTest(&register.Test{
24+
Run: kdumpSSHTest,
25+
ClusterSize: 0,
26+
Name: `kdump.crash.ssh`,
27+
Description: "Verifies kdump logs are exported to SSH destination",
28+
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
29+
Platforms: []string{"qemu"},
30+
})
31+
}
32+
33+
// The destination VM for kdump logs
34+
type SshServer struct {
35+
Machine platform.Machine
36+
MachineAddress string
37+
SSHPort string
38+
PrivSSH string
39+
PubSSH string
40+
}
41+
42+
// Start a VM and return the SSH key pair.
43+
func setupSSHMachine(c cluster.TestCluster) SshServer {
44+
var m platform.Machine
45+
var err error
46+
var address string
47+
var port string
48+
49+
options := platform.QemuMachineOptions{
50+
HostForwardPorts: []platform.HostForwardPort{
51+
{Service: "ssh", HostPort: 0, GuestPort: 22},
52+
},
53+
}
54+
55+
// temp dir to store SSH keys
56+
tmpd, err := os.MkdirTemp("", "kola-kdump-crash-ssh")
57+
if err != nil {
58+
c.Fatalf("Error creating tempdir: %v", err)
59+
}
60+
defer os.RemoveAll(tmpd)
61+
62+
// generate an ssh key pair we'll use for authentication
63+
pubkeyBuf, privkeyPath, err := util.CreateSSHAuthorizedKey(tmpd)
64+
if err != nil {
65+
c.Fatalf("Error creating ssh keys: %v", err)
66+
}
67+
68+
// load the private key as well
69+
privKeyBuf, err := os.ReadFile(privkeyPath)
70+
if err != nil {
71+
c.Fatalf("error reading pubkey: %v", err)
72+
}
73+
74+
// Inject the public key previously created as an
75+
// authorized key
76+
ignition := conf.Ignition(fmt.Sprintf(`{
77+
"ignition": { "version": "3.4.0" },
78+
"passwd":{
79+
"users":[
80+
{
81+
"name":"core",
82+
"sshAuthorizedKeys": ["%s"]
83+
}
84+
]
85+
}
86+
}`, strings.TrimSpace(string(pubkeyBuf))))
87+
88+
// start the machine
89+
switch c := c.Cluster.(type) {
90+
// These cases have to be separated because when put together to the same case statement
91+
// the golang compiler no longer checks that the individual types in the case have the
92+
// NewMachineWithQemuOptions function, but rather whether platform.Cluster
93+
// does which fails
94+
case *qemu.Cluster:
95+
m, err = c.NewMachineWithQemuOptions(ignition, options)
96+
default:
97+
panic("unreachable")
98+
}
99+
if err != nil {
100+
c.Fatal(err)
101+
}
102+
103+
// get the ssh port
104+
for _, hfp := range options.HostForwardPorts {
105+
if hfp.Service == "ssh" {
106+
address = "10.0.2.2"
107+
port = fmt.Sprintf("%d", hfp.HostPort)
108+
}
109+
}
110+
111+
return SshServer{
112+
Machine: m,
113+
MachineAddress: address,
114+
SSHPort: port,
115+
PubSSH: string(pubkeyBuf),
116+
PrivSSH: string(privKeyBuf),
117+
}
118+
}
119+
120+
func kdumpSSHTest(c cluster.TestCluster) {
121+
ssh_host := setupSSHMachine(c)
122+
123+
// insert indentation in front SSH prviate key lines
124+
// to avoid errors in the butane file
125+
var padded = ""
126+
for _, line := range strings.Split(strings.TrimSuffix(ssh_host.PrivSSH, "\n"), "\n") {
127+
padded = fmt.Sprintf("%s %s\n", padded, line)
128+
}
129+
130+
butane := conf.Butane(fmt.Sprintf(`variant: fcos
131+
version: 1.5.0
132+
storage:
133+
files:
134+
- path: /root/.ssh/id_ssh_kdump.pub
135+
mode: 0600
136+
contents:
137+
inline: |
138+
%s
139+
- path: /root/.ssh/id_ssh_kdump
140+
mode: 0600
141+
contents:
142+
inline: |
143+
%s
144+
- path: /root/.ssh/config
145+
mode: 0644
146+
overwrite: true
147+
contents:
148+
inline: |
149+
Host %s
150+
StrictHostKeyChecking no
151+
Port %s
152+
- path: /etc/kdump.conf
153+
overwrite: true
154+
contents:
155+
inline: |
156+
ssh core@%s
157+
sshkey /root/.ssh/id_ssh_kdump
158+
path /home/core/crash
159+
core_collector makedumpfile -F -l --message-level 1 -d 31
160+
systemd:
161+
units:
162+
- name: kdump.service
163+
enabled: true
164+
kernel_arguments:
165+
should_exist:
166+
- crashkernel=512M`,
167+
ssh_host.PubSSH, padded, ssh_host.MachineAddress, ssh_host.SSHPort, ssh_host.MachineAddress))
168+
169+
opts := platform.MachineOptions{
170+
MinMemory: 2048,
171+
}
172+
173+
kdump_machine, err := c.NewMachineWithOptions(butane, opts)
174+
if err != nil {
175+
c.Fatalf("Unable to create test machine: %v", err)
176+
}
177+
178+
// Wait for kdump to become active
179+
// 3 minutes should be enough to generate the kdump initramfs
180+
err = util.Retry(12, 15*time.Second, func() error {
181+
182+
kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")
183+
184+
if err != nil {
185+
return err
186+
} else if string(kdump_status) == "inactive" {
187+
return fmt.Errorf(fmt.Sprintf("Kdump.service is not ready: %s.", string(kdump_status)))
188+
}
189+
return nil
190+
})
191+
if err != nil {
192+
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
193+
}
194+
195+
// crash the kernel
196+
// use systemd-run because direclty calling `echo c...` will alaways
197+
// throw an error as the kernel immediately hangs.
198+
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
199+
if err != nil {
200+
c.Fatalf("failed to queue kernel crash: %v", err)
201+
}
202+
203+
// Wait for kdump to create vmcore dump on the remote host
204+
err = util.Retry(5, 10*time.Second, func() error {
205+
206+
// Look for the crash files created on the SSH machine
207+
logs, err := c.SSH(ssh_host.Machine, "find /home/core/crash -type f -name vmcore*")
208+
209+
if err != nil {
210+
return fmt.Errorf("failed to search for vmcore: %w", err)
211+
} else if logs == nil {
212+
return fmt.Errorf("No vmcore created on remote SSH host")
213+
}
214+
return nil
215+
})
216+
if err != nil {
217+
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
218+
}
219+
}

0 commit comments

Comments
 (0)