From 73e2c0698128344df0205b9f1db7dfefe3b5cda9 Mon Sep 17 00:00:00 2001 From: Rushikesh Jadhav Date: Thu, 20 Mar 2025 17:27:00 +0530 Subject: [PATCH] Added `test_linstor_sr_fail_host` to simulate a crash of a non-master host. - Chooses a host within a LINSTOR SR pool and simulate crash using sysrq-trigger. - Verifies VM boot and shutdown on all remaining hosts during the outage, and confirms recovery of the failed host for VM placement post-reboot. - Ensures SR scan consistency post-recovery. Signed-off-by: Rushikesh Jadhav --- tests/storage/linstor/test_linstor_sr.py | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/storage/linstor/test_linstor_sr.py b/tests/storage/linstor/test_linstor_sr.py index 7dc6f4597..513c98565 100644 --- a/tests/storage/linstor/test_linstor_sr.py +++ b/tests/storage/linstor/test_linstor_sr.py @@ -131,6 +131,48 @@ def test_linstor_missing(self, linstor_sr, host): if not linstor_installed: host.yum_install([LINSTOR_PACKAGE]) + @pytest.mark.reboot + @pytest.mark.small_vm + def test_linstor_sr_fail_host(self, linstor_sr, host, vm_on_linstor_sr): + """ + Fail non master host from the same pool Linstor SR. + Ensure that VM is able to boot and shutdown on all hosts. + """ + import random + sr = linstor_sr + vm = vm_on_linstor_sr + # Ensure that its a single host pool and not multi host pool + assert len(host.pool.hosts) > 2, "This test requires Pool to have more than 2 hosts" + + # Remove master from hosts list to avoid xapi calls failure + hosts = list(sr.pool.hosts) + hosts.remove(sr.pool.master) + # Evacuate the node to be deleted + try: + random_host = random.choice(hosts) # TBD: Choose Linstor Diskfull node + logging.info("Working on %s", random_host.hostname_or_ip) + random_host.ssh(['echo', 'c', '>', '/proc/sysrq-trigger']) + except Exception as e: + logging.info("Host %s could be crashed with output %s.", random_host.hostname_or_ip, e.stdout) + + # Ensure that VM is able to start on all hosts except failed one + for h in sr.pool.hosts: + logging.info("Checking VM on host %s", h.hostname_or_ip) + if h.hostname_or_ip != random_host.hostname_or_ip: + vm.start(on=h.uuid) + vm.wait_for_os_booted() + vm.shutdown(verify=True) + + # Wait for radom_host to come online + wait_for(random_host.is_enabled, "Wait for crashed host enabled", timeout_secs=30 * 60) + + # Ensure that the VM is able to run on crashed host as well. + vm.start(on=random_host.uuid) + vm.wait_for_os_booted() + vm.shutdown(verify=True) + + sr.scan() + # *** End of tests with reboots # --- Test diskless resources --------------------------------------------------