Skip to content

Commit af1468c

Browse files
committed
Revert "tests/ingition/kdump: add a remote NFS kdump test"
This reverts commit b10d8dc. The test passes on F40 but not on F41+ [1] and also it is failing on RHCOS so let's just yank it for now and re-apply when it's confirmed to be passing everywhere. [1] coreos/fedora-coreos-tracker#1820
1 parent 7653b93 commit af1468c

File tree

1 file changed

+25
-159
lines changed

1 file changed

+25
-159
lines changed

mantle/kola/tests/ignition/kdump.go

Lines changed: 25 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -28,63 +28,6 @@ func init() {
2828
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
2929
Platforms: []string{"qemu"},
3030
})
31-
register.RegisterTest(&register.Test{
32-
Run: kdumpNFSTest,
33-
ClusterSize: 0,
34-
Name: `kdump.crash.nfs`,
35-
Description: "Verifies kdump logs are exported to NFS destination",
36-
Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag},
37-
Platforms: []string{"qemu"},
38-
})
39-
}
40-
41-
// This function test the remote kdump feature by:
42-
// - making sure kdump is ready
43-
// - crashing machine
44-
// - monitoring the expected vmcore path
45-
func testRemoteKdump(c cluster.TestCluster, kdump_machine platform.Machine, remote_machine platform.Machine, crash_path string) {
46-
47-
// Wait for kdump to become active
48-
// 3 minutes should be enough to generate the kdump initramfs
49-
err := util.Retry(12, 15*time.Second, func() error {
50-
51-
kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")
52-
53-
if err != nil {
54-
return err
55-
} else if string(kdump_status) == "inactive" {
56-
return fmt.Errorf("Kdump.service is not ready: %s.", string(kdump_status))
57-
}
58-
return nil
59-
})
60-
if err != nil {
61-
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
62-
}
63-
64-
// crash the kernel
65-
// use systemd-run because direclty calling `echo c > ...` will always
66-
// throw an error as the kernel immediately hangs.
67-
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
68-
if err != nil {
69-
c.Fatalf("failed to queue kernel crash: %v", err)
70-
}
71-
72-
// Wait for kdump to create vmcore dump on the remote host
73-
err = util.Retry(5, 10*time.Second, func() error {
74-
75-
// Look for the crash files created on the SSH machine
76-
logs, err := c.SSH(remote_machine, fmt.Sprintf("find %s -type f -name vmcore*", crash_path))
77-
78-
if err != nil {
79-
return fmt.Errorf("failed to search for vmcore: %w", err)
80-
} else if logs == nil {
81-
return fmt.Errorf("No vmcore created on remote host")
82-
}
83-
return nil
84-
})
85-
if err != nil {
86-
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
87-
}
8831
}
8932

9033
// The destination VM for kdump logs
@@ -237,122 +180,45 @@ kernel_arguments:
237180
c.Fatalf("Unable to create test machine: %v", err)
238181
}
239182

240-
testRemoteKdump(c, kdump_machine, ssh_host.Machine, "/home/core/crash")
241-
}
242-
243-
// The destination VM for kdump logs over NFS
244-
type NfsServer struct {
245-
Machine platform.Machine
246-
MachineAddress string
247-
}
248-
249-
func setupNFSMachine(c cluster.TestCluster) NfsServer {
250-
var m platform.Machine
251-
var err error
252-
253-
options := platform.QemuMachineOptions{
254-
HostForwardPorts: []platform.HostForwardPort{
255-
{Service: "ssh", HostPort: 0, GuestPort: 22},
256-
// Kdump NFS option does not allow a custom port
257-
{Service: "nfs", HostPort: 2049, GuestPort: 2049},
258-
},
259-
}
183+
// Wait for kdump to become active
184+
// 3 minutes should be enough to generate the kdump initramfs
185+
err = util.Retry(12, 15*time.Second, func() error {
260186

261-
nfs_server_butane := conf.Butane(`variant: fcos
262-
version: 1.5.0
263-
storage:
264-
files:
265-
- path: /etc/containers/systemd/nfs.container
266-
overwrite: true
267-
contents:
268-
inline: |
269-
[Container]
270-
Image=quay.io/openshifttest/nfs-server
271-
Volume=/var/nfs:/mnt/data
272-
PublishPort=2049:2049
273-
PodmanArgs=--privileged
274-
[Install]
275-
WantedBy=default.target
276-
directories:
277-
- path: /var/nfs/crash`)
187+
kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service")
278188

279-
// start the machine
280-
switch c := c.Cluster.(type) {
281-
// These cases have to be separated because when put together to the same case statement
282-
// the golang compiler no longer checks that the individual types in the case have the
283-
// NewMachineWithQemuOptions function, but rather whether platform.Cluster
284-
// does which fails
285-
case *qemu.Cluster:
286-
m, err = c.NewMachineWithQemuOptions(nfs_server_butane, options)
287-
default:
288-
panic("unreachable")
289-
}
189+
if err != nil {
190+
return err
191+
} else if string(kdump_status) == "inactive" {
192+
return fmt.Errorf(fmt.Sprintf("Kdump.service is not ready: %s.", string(kdump_status)))
193+
}
194+
return nil
195+
})
290196
if err != nil {
291-
c.Fatal(err)
292-
}
293-
294-
return NfsServer{
295-
Machine: m,
296-
MachineAddress: "10.0.2.2",
297-
}
298-
}
299-
300-
func kdumpNFSTest(c cluster.TestCluster) {
301-
nfs_host := setupNFSMachine(c)
302-
303-
butane := conf.Butane(fmt.Sprintf(`variant: fcos
304-
version: 1.5.0
305-
storage:
306-
files:
307-
- path: /etc/kdump.conf
308-
overwrite: true
309-
contents:
310-
inline: |
311-
nfs %s:/
312-
path /crash
313-
core_collector makedumpfile -l --message-level 1 -d 31
314-
extra_bins /sbin/mount.nfs
315-
extra_modules nfs nfsv3 nfs_layout_nfsv41_files blocklayoutdriver nfs_layout_flexfiles nfs_layout_nfsv41_files
316-
systemd:
317-
units:
318-
- name: kdump.service
319-
enabled: true
320-
dropins:
321-
- name: debug.conf
322-
contents: |
323-
[Service]
324-
Environment="debug=1"
325-
kernel_arguments:
326-
should_exist:
327-
- crashkernel=512M`,
328-
nfs_host.MachineAddress))
329-
330-
opts := platform.MachineOptions{
331-
MinMemory: 2048,
197+
c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err)
332198
}
333199

334-
kdump_machine, err := c.NewMachineWithOptions(butane, opts)
200+
// crash the kernel
201+
// use systemd-run because direclty calling `echo c...` will alaways
202+
// throw an error as the kernel immediately hangs.
203+
_, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'")
335204
if err != nil {
336-
c.Fatalf("Unable to create test machine: %v", err)
205+
c.Fatalf("failed to queue kernel crash: %v", err)
337206
}
338207

339-
// XXX Refactor this
340-
// Wait for nfs server to become active
341-
// 1 minutes should be enough to pull the container image
342-
err = util.Retry(4, 15*time.Second, func() error {
208+
// Wait for kdump to create vmcore dump on the remote host
209+
err = util.Retry(5, 10*time.Second, func() error {
343210

344-
nfs_status, err := c.SSH(nfs_host.Machine, "systemctl is-active nfs.service")
211+
// Look for the crash files created on the SSH machine
212+
logs, err := c.SSH(ssh_host.Machine, "find /home/core/crash -type f -name vmcore*")
345213

346214
if err != nil {
347-
return err
348-
} else if string(nfs_status) == "inactive" {
349-
return fmt.Errorf("nfs.service is not ready: %s.", string(nfs_status))
215+
return fmt.Errorf("failed to search for vmcore: %w", err)
216+
} else if logs == nil {
217+
return fmt.Errorf("No vmcore created on remote SSH host")
350218
}
351219
return nil
352220
})
353221
if err != nil {
354-
c.Fatalf("Timed out while waiting for nfs.service to be ready: %v", err)
222+
c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err)
355223
}
356-
357-
testRemoteKdump(c, kdump_machine, nfs_host.Machine, "/var/nfs/crash")
358224
}

0 commit comments

Comments
 (0)