Skip to content

Commit 33280e1

Browse files
Handle scenario where if VM.start, xcp-persistent-database is InUse, are on failing disk-host, then VM.start may get stuck.
The state can be recovered by bringing the failed device online however it means that the test failed. Signed-off-by: Rushikesh Jadhav <[email protected]>
1 parent b0d89c5 commit 33280e1

File tree

1 file changed

+17
-4
lines changed

1 file changed

+17
-4
lines changed

tests/storage/linstor/test_linstor_sr.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ def test_linstor_sr_fail_disk(self, linstor_sr, vm_on_linstor_sr, provisioning_t
138138
Identify random host within the same pool, detect used disks, fail one, and test VM useability on LINSTOR SR.
139139
"""
140140
import random
141+
import multiprocessing
141142

142143
sr = linstor_sr
143144
if provisioning_type == "thick":
@@ -162,10 +163,22 @@ def test_linstor_sr_fail_disk(self, linstor_sr, vm_on_linstor_sr, provisioning_t
162163

163164
# Ensure that VM is able to start on all hosts despite Linstor pool disk failure
164165
for h in sr.pool.hosts:
165-
logging.info(f"Checking VM on host {h.hostname_or_ip}")
166-
vm.start(on=h.uuid)
167-
vm.wait_for_os_booted()
168-
vm.shutdown(verify=True)
166+
logging.info("Checking VM on host %s", h.hostname_or_ip)
167+
try:
168+
proc = multiprocessing.Process(target=vm.start, kwargs={'on': h.uuid})
169+
proc.start()
170+
proc.join(timeout=30)
171+
if proc.is_alive():
172+
proc.terminate()
173+
proc.join()
174+
logging.warning("VM start on host %s timed out. Recovering failed disk.", h.hostname_or_ip)
175+
random_host.ssh(['echo', '"running"', '>', f'/sys/block/{fail_device}/device/state'])
176+
pytest.fail("VM start timed out on host %s after 30s. Disk recovered.", h.hostname_or_ip)
177+
else: # VM booted fine
178+
vm.wait_for_os_booted()
179+
vm.shutdown(verify=True)
180+
except Exception as e:
181+
logging.info("Caught exception in multiprocessing: %s", e)
169182

170183
random_host.reboot(verify=True)
171184

0 commit comments

Comments
 (0)