Skip to content

Commit b0d89c5

Browse files
Added test_linstor_sr_fail_disk which
- Simulates failure of a LVM PV on a random host in the LINSTOR SR pool by offlining a selected disk. - Verifies VM start/shutdown on all hosts despite the degraded pool state. - Also ensures SR and PBDs recover after reboot of the affected host. Signed-off-by: Rushikesh Jadhav <[email protected]>
1 parent 4fbe63e commit b0d89c5

File tree

1 file changed

+46
-1
lines changed

1 file changed

+46
-1
lines changed

tests/storage/linstor/test_linstor_sr.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33
import time
44

5-
from .conftest import LINSTOR_PACKAGE
5+
from .conftest import GROUP_NAME, LINSTOR_PACKAGE
66
from lib.commands import SSHCommandFailed
77
from lib.common import wait_for, vm_image
88
from tests.storage import vdi_is_open
@@ -131,6 +131,51 @@ def test_linstor_missing(self, linstor_sr, host):
131131
if not linstor_installed:
132132
host.yum_install([LINSTOR_PACKAGE])
133133

134+
@pytest.mark.reboot
135+
@pytest.mark.small_vm
136+
def test_linstor_sr_fail_disk(self, linstor_sr, vm_on_linstor_sr, provisioning_type):
137+
"""
138+
Identify random host within the same pool, detect used disks, fail one, and test VM useability on LINSTOR SR.
139+
"""
140+
import random
141+
142+
sr = linstor_sr
143+
if provisioning_type == "thick":
144+
time.sleep(45) # Let xcp-persistent-database come in sync across the nodes
145+
146+
vm = vm_on_linstor_sr
147+
148+
# Fail a disk from random host of Linstor pool
149+
try:
150+
random_host = random.choice(sr.pool.hosts) # TBD: Choose Linstor Diskfull node
151+
logging.info("Working on %s", random_host.hostname_or_ip)
152+
devices = random_host.ssh('vgs ' + GROUP_NAME + ' -o pv_name --no-headings').split("\n")
153+
# Choosing last device from list, assuming its least filled
154+
fail_device = devices[-1].strip() # /dev/sdb
155+
fail_device = random_host.ssh(['lsblk', fail_device, '--nodeps --output NAME --noheadings']) # sdb
156+
logging.info("Attempting to fail device: %s", fail_device)
157+
random_host.ssh(['echo', '"offline"', '>', '/sys/block/' + fail_device + '/device/state'])
158+
except Exception as e:
159+
# Offline disk shall connect back after host reboot. Teardown normally.
160+
random_host.reboot(verify=True)
161+
pytest.fail("Failed to simulate device failure. Error %s", e.stdout)
162+
163+
# Ensure that VM is able to start on all hosts despite Linstor pool disk failure
164+
for h in sr.pool.hosts:
165+
logging.info(f"Checking VM on host {h.hostname_or_ip}")
166+
vm.start(on=h.uuid)
167+
vm.wait_for_os_booted()
168+
vm.shutdown(verify=True)
169+
170+
random_host.reboot(verify=True)
171+
172+
# Ensure PBDs are attached post reboot
173+
if not sr.all_pbds_attached():
174+
sr.plug_pbds()
175+
176+
# Ensure SR scan works and proceed for teardown
177+
sr.scan()
178+
134179
# *** End of tests with reboots
135180

136181
# --- Test diskless resources --------------------------------------------------

0 commit comments

Comments
 (0)