Skip to content

Commit 7b39d7d

Browse files
committed
WIP install 8/n: run installer, go see inside and detect success
FIXME: - specification of answerfile contents and dom0 cmdline is tightly linked, e.g. we rely on atexit=shell - Pool construction on firstboot is problematic - repeatedly polling with grep is junk
1 parent f37f9fd commit 7b39d7d

File tree

3 files changed

+165
-2
lines changed

3 files changed

+165
-2
lines changed

data.py-dist

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
HOST_DEFAULT_USER = "root"
77
HOST_DEFAULT_PASSWORD = ""
88

9+
# Public key for a private key available to the test runner
10+
TEST_SSH_PUBKEY = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDKz9uQOoxq6Q0SQ0XTzQHhDolvuo/7EyrDZsYQbRELhcPJG8MT/o5u3HyJFhIP2+HqBSXXgmqRPJUkwz9wUwb2sUwf44qZm/pyPUWOoxyVtrDXzokU/uiaNKUMhbnfaXMz6Ogovtjua63qld2+ZRXnIgrVtYKtYBeu/qKGVSnf4FTOUKl1w3uKkr59IUwwAO8ay3wVnxXIHI/iJgq6JBgQNHbn3C/SpYU++nqL9G7dMyqGD36QPFuqH/cayL8TjNZ67TgAzsPX8OvmRSqjrv3KFbeSlpS/R4enHkSemhgfc8Z2f49tE7qxWZ6x4Uyp5E6ur37FsRf/tEtKIUJGMRXN XCP-ng CI"
11+
912
# The following prefix will be added to the `name-label` parameter of XAPI objects
1013
# that the tests will create or import, such as VMs and SRs.
1114
# Default value: [your login/user]

tests/install/conftest.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def iso_remaster(request, answerfile):
1212
iso_key = marker.args[0]
1313
ANSWERFILE_URL = "http://pxe/configs/custom/ydi/install-8.2-uefi-iso-ext.xml" # FIXME
1414

15-
from data import ISO_IMAGES, ISOSR_SRV, ISOSR_PATH, TOOLS
15+
from data import ISO_IMAGES, ISOSR_SRV, ISOSR_PATH, TEST_SSH_PUBKEY, TOOLS
1616
assert "iso-remaster" in TOOLS
1717
iso_remaster = TOOLS["iso-remaster"]
1818
assert os.access(iso_remaster, os.X_OK)
@@ -23,6 +23,7 @@ def iso_remaster(request, answerfile):
2323
with tempfile.TemporaryDirectory() as isotmp:
2424
remastered_iso = os.path.join(isotmp, "image.iso")
2525
iso_patcher_script = os.path.join(isotmp, "iso-patcher")
26+
img_patcher_script = os.path.join(isotmp, "img-patcher")
2627

2728
logging.info("Remastering %s to %s", SOURCE_ISO, remastered_iso)
2829

@@ -32,14 +33,17 @@ def iso_remaster(request, answerfile):
3233
set -ex
3334
INSTALLIMG="$1"
3435
36+
mkdir -p "$INSTALLIMG/root/.ssh"
37+
echo "{TEST_SSH_PUBKEY}" > "$INSTALLIMG/root/.ssh/authorized_keys"
38+
3539
curl {ANSWERFILE_URL} -o "$INSTALLIMG/root/answerfile.xml"
3640
""",
3741
file=patcher_fd)
3842
os.chmod(patcher_fd.fileno(), 0o755)
3943

4044
# generate iso-patcher script
4145
with open(iso_patcher_script, "xt") as patcher_fd:
42-
passwd = "passw0rd" # FIXME hash
46+
passwd = "passw0rd" # FIXME use invalid hash
4347
print(f"""#!/bin/bash
4448
set -ex
4549
ISODIR="$1"
@@ -55,6 +59,7 @@ def iso_remaster(request, answerfile):
5559

5660
# do remaster
5761
local_cmd([iso_remaster,
62+
"--install-patcher", img_patcher_script,
5863
"--iso-patcher", iso_patcher_script,
5964
SOURCE_ISO, remastered_iso
6065
])

tests/install/test.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
import logging
2+
import os
23
import pytest
4+
import time
5+
6+
from lib import commands, pxe
7+
from lib.common import wait_for
8+
from lib.host import Host
9+
from lib.pool import Pool
310

411
class TestNested:
512
@pytest.mark.vm_definitions(
@@ -25,4 +32,152 @@ def test_install_821_uefi(self, iso_remaster, create_vms):
2532
# FIXME should be part of vm def
2633
host_vm.create_cd_vbd(device="xvdd", userdevice="3")
2734

35+
vif = host_vm.vifs()[0]
36+
mac_address = vif.param_get('MAC')
37+
logging.info("Host VM has MAC %s", mac_address)
38+
2839
host_vm.insert_cd(iso_remaster)
40+
41+
try:
42+
host_vm.start()
43+
wait_for(host_vm.is_running, "Wait for host VM running")
44+
45+
# catch host-vm IP address
46+
wait_for(lambda: pxe.arp_addresses_for(mac_address),
47+
"Wait for DHCP server to see Host VM in ARP tables",
48+
timeout_secs=10*60)
49+
ips = pxe.arp_addresses_for(mac_address)
50+
logging.info("Host VM has IPs %s", ips)
51+
assert len(ips) == 1
52+
host_vm.ip = ips[0]
53+
54+
host_vm.ssh(["ls"])
55+
logging.debug("ssh works")
56+
57+
# wait for "yum install" phase to finish
58+
wait_for(lambda: host_vm.ssh(["grep",
59+
"'DISPATCH: NEW PHASE: Completing installation'",
60+
"/tmp/install-log"],
61+
check=False, simple_output=False,
62+
).returncode == 0,
63+
"Wait for rpm installation to succeed",
64+
timeout_secs=40*60) # FIXME too big
65+
66+
# wait for install to finish
67+
wait_for(lambda: host_vm.ssh(["grep",
68+
"'The installation completed successfully'",
69+
"/tmp/install-log"],
70+
check=False, simple_output=False,
71+
).returncode == 0,
72+
"Wait for system installation to succeed",
73+
timeout_secs=40*60) # FIXME too big
74+
75+
wait_for(lambda: host_vm.ssh(["ps a|grep '[0-9]. python /opt/xensource/installer/init'"],
76+
check=False, simple_output=False,
77+
).returncode == 1,
78+
"Wait for installer to terminate")
79+
80+
# powercycle, catch any change of IP
81+
logging.info("Shutting down Host VM after successful installation")
82+
try:
83+
# use "poweroff" because "reboot" would cause ARP and
84+
# SSH to be checked before host is down, and require
85+
# ssh retries
86+
host_vm.ssh(["poweroff"])
87+
except commands.SSHCommandFailed as e:
88+
# ignore connection closed by reboot
89+
if e.returncode == 255 and "closed by remote host" in e.stdout:
90+
logging.info("sshd closed the connection")
91+
pass
92+
else:
93+
raise
94+
wait_for(host_vm.is_halted, "Wait for host VM halted")
95+
host_vm.eject_cd()
96+
97+
# FIXME: make a snapshot here
98+
99+
# FIXME: evict MAC from ARP cache first?
100+
host_vm.start()
101+
wait_for(host_vm.is_running, "Wait for host VM running")
102+
103+
ips = pxe.arp_addresses_for(mac_address)
104+
logging.info("Host VM has IPs %s", ips)
105+
assert len(ips) == 1
106+
host_vm.ip = ips[0]
107+
108+
wait_for(lambda: not os.system(f"nc -zw5 {host_vm.ip} 22"),
109+
"Wait for ssh back up on Host VM", retry_delay_secs=5)
110+
111+
# FIXME "xe host-list" in there can fail with various
112+
# errors until the XAPI DB is initialized enough for
113+
# Pool.__init__(), which assumes the master XAPI is up and
114+
# running.
115+
116+
# pool master must be reachable here
117+
# FIXME: not sure why we seem to need this, while port 22 has been seen open
118+
tries = 5
119+
while True:
120+
try:
121+
pool = Pool(host_vm.ip)
122+
except commands.SSHCommandFailed as e:
123+
if "Connection refused" not in e.stdout:
124+
raise
125+
tries -= 1
126+
if tries:
127+
logging.warning("retrying connection to pool master")
128+
time.sleep(2)
129+
continue
130+
# retries failed
131+
raise
132+
# it worked!
133+
break
134+
135+
# wait for XAPI
136+
wait_for(pool.master.is_enabled, "Wait for XAPI to be ready", timeout_secs=30 * 60)
137+
138+
# check for firstboot issues
139+
# FIXME: flaky, must check logs extraction on failure
140+
for service in ["control-domain-params-init",
141+
"network-init",
142+
"storage-init",
143+
"generate-iscsi-iqn",
144+
"create-guest-templates",
145+
]:
146+
try:
147+
wait_for(lambda: pool.master.ssh(["test", "-e", f"/var/lib/misc/ran-{service}"],
148+
check=False, simple_output=False,
149+
).returncode == 0,
150+
f"Wait for ran-{service} stamp")
151+
except TimeoutError:
152+
logging.warning("investigating lack of ran-{service} stamp")
153+
out = pool.master.ssh(["systemctl", "status", service], check=False)
154+
logging.warning("service status: %s", out)
155+
out = pool.master.ssh(["grep", "-r", service, "/var/log"], check=False)
156+
logging.warning("in logs: %s", out)
157+
158+
logging.info("Powering off pool master")
159+
try:
160+
# use "poweroff" because "reboot" would cause ARP and
161+
# SSH to be checked before host is down, and require
162+
# ssh retries
163+
pool.master.ssh(["poweroff"])
164+
except commands.SSHCommandFailed as e:
165+
# ignore connection closed by reboot
166+
if e.returncode == 255 and "closed by remote host" in e.stdout:
167+
logging.info("sshd closed the connection")
168+
pass
169+
else:
170+
raise
171+
172+
wait_for(host_vm.is_halted, "Wait for host VM halted")
173+
174+
except Exception as e:
175+
logging.critical("caught exception %s", e)
176+
# wait_for(lambda: False, 'Wait "forever"', timeout_secs=100*60)
177+
host_vm.shutdown(force=True)
178+
raise
179+
except KeyboardInterrupt:
180+
logging.warning("keyboard interrupt")
181+
# wait_for(lambda: False, 'Wait "forever"', timeout_secs=100*60)
182+
host_vm.shutdown(force=True)
183+
raise

0 commit comments

Comments
 (0)