Skip to content

Commit 52d2a71

Browse files
Handle scenario where if VM.start, xcp-persistent-database is InUse, are on failing disk-host, then VM.start may get stuck.
The state can be recovered by bringing the failed device online however it means that the test failed. Signed-off-by: Rushikesh Jadhav <[email protected]>
1 parent b0d89c5 commit 52d2a71

File tree

1 file changed

+21
-5
lines changed

1 file changed

+21
-5
lines changed

tests/storage/linstor/test_linstor_sr.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ def test_linstor_sr_fail_disk(self, linstor_sr, vm_on_linstor_sr, provisioning_t
138138
Identify random host within the same pool, detect used disks, fail one, and test VM useability on LINSTOR SR.
139139
"""
140140
import random
141+
import multiprocessing
141142

142143
sr = linstor_sr
143144
if provisioning_type == "thick":
@@ -158,14 +159,29 @@ def test_linstor_sr_fail_disk(self, linstor_sr, vm_on_linstor_sr, provisioning_t
158159
except Exception as e:
159160
# Offline disk shall connect back after host reboot. Teardown normally.
160161
random_host.reboot(verify=True)
161-
pytest.fail("Failed to simulate device failure. Error %s", e.stdout)
162+
pytest.fail("Failed to simulate device failure. Error %s", e)
162163

163164
# Ensure that VM is able to start on all hosts despite Linstor pool disk failure
164165
for h in sr.pool.hosts:
165-
logging.info(f"Checking VM on host {h.hostname_or_ip}")
166-
vm.start(on=h.uuid)
167-
vm.wait_for_os_booted()
168-
vm.shutdown(verify=True)
166+
logging.info("Checking VM on host %s", h.hostname_or_ip)
167+
try:
168+
proc = multiprocessing.Process(target=vm.start, kwargs={'on': h.uuid})
169+
proc.start()
170+
proc.join(timeout=30)
171+
if proc.is_alive():
172+
proc.terminate()
173+
proc.join()
174+
logging.warning("VM start on host %s timed out. Recovering failed disk.", h.hostname_or_ip)
175+
random_host.ssh(['echo', '"running"', '>', f'/sys/block/{fail_device}/device/state'])
176+
# Handle in case VM.start succeed after disk becomes online
177+
if vm.is_running():
178+
vm.shutdown(verify=True, force_if_fails=True)
179+
pytest.fail("VM start timed out on host %s after 30s. Disk recovered.", h.hostname_or_ip)
180+
else: # VM booted fine
181+
vm.wait_for_os_booted()
182+
vm.shutdown(verify=True)
183+
except Exception as e:
184+
logging.info("Caught exception in multiprocessing: %s", e)
169185

170186
random_host.reboot(verify=True)
171187

0 commit comments

Comments
 (0)