@@ -28,6 +28,63 @@ func init() {
2828 Tags : []string {"kdump" , kola .SkipBaseChecksTag , kola .NeedsInternetTag },
2929 Platforms : []string {"qemu" },
3030 })
31+ register .RegisterTest (& register.Test {
32+ Run : kdumpNFSTest ,
33+ ClusterSize : 0 ,
34+ Name : `kdump.crash.nfs` ,
35+ Description : "Verifies kdump logs are exported to NFS destination" ,
36+ Tags : []string {"kdump" , kola .SkipBaseChecksTag , kola .NeedsInternetTag },
37+ Platforms : []string {"qemu" },
38+ })
39+ }
40+
41+ // This function test the remote kdump feature by:
42+ // - making sure kdump is ready
43+ // - crashing machine
44+ // - monitoring the expected vmcore path
45+ func testRemoteKdump (c cluster.TestCluster , kdump_machine platform.Machine , remote_machine platform.Machine , crash_path string ) {
46+
47+ // Wait for kdump to become active
48+ // 3 minutes should be enough to generate the kdump initramfs
49+ err := util .Retry (12 , 15 * time .Second , func () error {
50+
51+ kdump_status , err := c .SSH (kdump_machine , "systemctl is-active kdump.service" )
52+
53+ if err != nil {
54+ return err
55+ } else if string (kdump_status ) == "inactive" {
56+ return fmt .Errorf ("Kdump.service is not ready: %s." , string (kdump_status ))
57+ }
58+ return nil
59+ })
60+ if err != nil {
61+ c .Fatalf ("Timed out while waiting for kdump.service to be ready: %v" , err )
62+ }
63+
64+ // crash the kernel
65+ // use systemd-run because direclty calling `echo c > ...` will always
66+ // throw an error as the kernel immediately hangs.
67+ _ , err = c .SSH (kdump_machine , "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'" )
68+ if err != nil {
69+ c .Fatalf ("failed to queue kernel crash: %v" , err )
70+ }
71+
72+ // Wait for kdump to create vmcore dump on the remote host
73+ err = util .Retry (8 , 10 * time .Second , func () error {
74+
75+ // Look for the crash files created on the SSH machine
76+ logs , err := c .SSH (remote_machine , fmt .Sprintf ("find %s -type f -name vmcore*" , crash_path ))
77+
78+ if err != nil {
79+ return fmt .Errorf ("failed to search for vmcore: %w" , err )
80+ } else if logs == nil {
81+ return fmt .Errorf ("No vmcore created on remote host" )
82+ }
83+ return nil
84+ })
85+ if err != nil {
86+ c .Fatalf ("Timed out while waiting for kdump to create vmcore files: %v" , err )
87+ }
3188}
3289
3390// The destination VM for kdump logs
@@ -180,45 +237,121 @@ kernel_arguments:
180237 c .Fatalf ("Unable to create test machine: %v" , err )
181238 }
182239
183- // Wait for kdump to become active
184- // 3 minutes should be enough to generate the kdump initramfs
185- err = util .Retry (12 , 15 * time .Second , func () error {
240+ testRemoteKdump (c , kdump_machine , ssh_host .Machine , "/home/core/crash" )
241+ }
186242
187- kdump_status , err := c .SSH (kdump_machine , "systemctl is-active kdump.service" )
243+ // The destination VM for kdump logs over NFS
244+ type NfsServer struct {
245+ Machine platform.Machine
246+ MachineAddress string
247+ }
188248
189- if err != nil {
190- return err
191- } else if string (kdump_status ) == "inactive" {
192- return fmt .Errorf ("kdump.service is not ready: %s" , string (kdump_status ))
193- }
194- return nil
195- })
249+ func setupNFSMachine (c cluster.TestCluster ) NfsServer {
250+ var m platform.Machine
251+ var err error
252+
253+ options := platform.QemuMachineOptions {
254+ HostForwardPorts : []platform.HostForwardPort {
255+ {Service : "ssh" , HostPort : 0 , GuestPort : 22 },
256+ // Kdump NFS option does not allow a custom port
257+ {Service : "nfs" , HostPort : 2049 , GuestPort : 2049 },
258+ },
259+ }
260+
261+ nfs_server_butane := conf .Butane (`variant: fcos
262+ version: 1.5.0
263+ storage:
264+ files:
265+ - path: /etc/containers/systemd/nfs.container
266+ overwrite: true
267+ contents:
268+ inline: |
269+ [Container]
270+ Image=quay.io/coreos-assembler/nfs
271+ Volume=/var/nfs:/export
272+ Network=host
273+ PodmanArgs=--privileged
274+ [Install]
275+ WantedBy=default.target
276+ directories:
277+ - path: /var/nfs/crash` )
278+
279+ // start the machine
280+ switch c := c .Cluster .(type ) {
281+ // These cases have to be separated because when put together to the same case statement
282+ // the golang compiler no longer checks that the individual types in the case have the
283+ // NewMachineWithQemuOptions function, but rather whether platform.Cluster
284+ // does which fails
285+ case * qemu.Cluster :
286+ m , err = c .NewMachineWithQemuOptions (nfs_server_butane , options )
287+ default :
288+ panic ("unreachable" )
289+ }
196290 if err != nil {
197- c .Fatalf ( "Timed out while waiting for kdump.service to be ready: %v" , err )
291+ c .Fatal ( err )
198292 }
199293
200- // crash the kernel
201- // use systemd-run because direclty calling `echo c...` will alaways
202- // throw an error as the kernel immediately hangs.
203- _ , err = c .SSH (kdump_machine , "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'" )
294+ return NfsServer {
295+ Machine : m ,
296+ MachineAddress : "10.0.2.2" ,
297+ }
298+ }
299+
300+ func kdumpNFSTest (c cluster.TestCluster ) {
301+ nfs_host := setupNFSMachine (c )
302+
303+ butane := conf .Butane (fmt .Sprintf (`variant: fcos
304+ version: 1.5.0
305+ storage:
306+ files:
307+ - path: /etc/kdump.conf
308+ overwrite: true
309+ contents:
310+ inline: |
311+ nfs %s:/
312+ path /crash
313+ core_collector makedumpfile -l --message-level 1 -d 31
314+ extra_bins /sbin/mount.nfs
315+ extra_modules nfs nfsv3 nfs_layout_nfsv41_files blocklayoutdriver nfs_layout_flexfiles nfs_layout_nfsv41_files
316+ systemd:
317+ units:
318+ - name: kdump.service
319+ enabled: true
320+ dropins:
321+ - name: debug.conf
322+ contents: |
323+ [Service]
324+ Environment="debug=1"
325+ kernel_arguments:
326+ should_exist:
327+ - crashkernel=512M` ,
328+ nfs_host .MachineAddress ))
329+
330+ opts := platform.MachineOptions {
331+ MinMemory : 2048 ,
332+ }
333+
334+ kdump_machine , err := c .NewMachineWithOptions (butane , opts )
204335 if err != nil {
205- c .Fatalf ("failed to queue kernel crash : %v" , err )
336+ c .Fatalf ("Unable to create test machine : %v" , err )
206337 }
207338
208- // Wait for kdump to create vmcore dump on the remote host
209- err = util .Retry (5 , 10 * time .Second , func () error {
339+ // Wait for nfs server to become active
340+ // 1 minutes should be enough to pull the container image
341+ err = util .Retry (4 , 15 * time .Second , func () error {
210342
211- // Look for the crash files created on the SSH machine
212- logs , err := c .SSH (ssh_host .Machine , "find /home/core/crash -type f -name vmcore*" )
343+ nfs_status , err := c .SSH (nfs_host .Machine , "systemctl is-active nfs.service" )
213344
214345 if err != nil {
215- return fmt . Errorf ( "failed to search for vmcore: %w" , err )
216- } else if logs == nil {
217- return fmt .Errorf ("No vmcore created on remote SSH host" )
346+ return err
347+ } else if string ( nfs_status ) == "inactive" {
348+ return fmt .Errorf ("nfs.service is not ready: %s." , string ( nfs_status ) )
218349 }
219350 return nil
220351 })
221352 if err != nil {
222- c .Fatalf ("Timed out while waiting for kdump to create vmcore files : %v" , err )
353+ c .Fatalf ("Timed out while waiting for nfs.service to be ready : %v" , err )
223354 }
355+
356+ testRemoteKdump (c , kdump_machine , nfs_host .Machine , "/var/nfs/crash" )
224357}
0 commit comments