Skip to content

Commit 1955c1f

Browse files
Andres D. Molinsaliel
authored andcommitted
Fix: Added qemu communication channel options to allow VM to receive commands
1 parent 00e5943 commit 1955c1f

5 files changed

Lines changed: 195 additions & 86 deletions

File tree

src/aleph/vm/controllers/configuration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class QemuVMConfiguration(BaseModel):
3434
image_path: str
3535
monitor_socket_path: Path
3636
qmp_socket_path: Path
37+
qga_socket_path: Path
3738
vcpu_count: int
3839
mem_size_mb: int
3940
interface_name: str | None = None
@@ -48,6 +49,7 @@ class QemuConfidentialVMConfiguration(BaseModel):
4849
image_path: str
4950
monitor_socket_path: Path
5051
qmp_socket_path: Path
52+
qga_socket_path: Path
5153
vcpu_count: int
5254
mem_size_mb: int
5355
interface_name: str | None = None

src/aleph/vm/controllers/qemu/client.py

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import base64
12
import time
23
from enum import Enum
34

45
import qmp
6+
import yaml
57
from pydantic import BaseModel
68

79

@@ -74,11 +76,19 @@ def __init__(self, vm):
7476
if not (vm.qmp_socket_path and vm.qmp_socket_path.exists()):
7577
msg = "VM is not running"
7678
raise Exception(msg)
77-
client = qmp.QEMUMonitorProtocol(str(vm.qmp_socket_path))
78-
client.connect()
7979

80-
# qmp_client = qmp.QEMUMonitorProtocol(address=("localhost", vm.qmp_port))
81-
self.qmp_client = client
80+
qmp_client = qmp.QEMUMonitorProtocol(str(vm.qmp_socket_path))
81+
qmp_client.connect()
82+
self.qmp_client = qmp_client
83+
84+
# QGA (QEMU Guest Agent) uses a separate communication channel over virtio-serial.
85+
# The wire protocol is JSON-based and compatible with the QMP library.
86+
if vm.qga_socket_path and vm.qga_socket_path.exists():
87+
qga_client = qmp.QEMUMonitorProtocol(str(vm.qga_socket_path))
88+
qga_client.connect()
89+
self.qga_client = qga_client
90+
else:
91+
self.qga_client = None
8292

8393
def __enter__(self):
8494
return self
@@ -88,6 +98,8 @@ def __exit__(self, exc_type, exc_val, exc_tb):
8898

8999
def close(self) -> None:
90100
self.qmp_client.close()
101+
if self.qga_client:
102+
self.qga_client.close()
91103

92104
def query_sev_info(self) -> VmSevInfo:
93105
caps = self.qmp_client.command("query-sev")
@@ -176,6 +188,13 @@ def migrate_cancel(self) -> None:
176188
"""Cancel ongoing migration."""
177189
self.qmp_client.command("migrate_cancel")
178190

191+
def _get_qga_client(self) -> qmp.QEMUMonitorProtocol:
192+
"""Get the QGA client, raising an error if not available."""
193+
if not self.qga_client:
194+
msg = "QEMU Guest Agent socket is not available"
195+
raise Exception(msg)
196+
return self.qga_client
197+
179198
def guest_exec(self, command: str, args: list[str] | None = None, capture_output: bool = True) -> dict:
180199
"""
181200
Execute a command in the guest via qemu-guest-agent.
@@ -185,10 +204,11 @@ def guest_exec(self, command: str, args: list[str] | None = None, capture_output
185204
:param capture_output: Whether to capture stdout/stderr
186205
:return: Dict with 'pid' key for the started process
187206
"""
207+
qga = self._get_qga_client()
188208
exec_args = {"path": command, "capture-output": capture_output}
189209
if args:
190210
exec_args["arg"] = args
191-
return self.qmp_client.command("guest-exec", **exec_args)
211+
return qga.command("guest-exec", **exec_args)
192212

193213
def guest_exec_status(self, pid: int) -> dict:
194214
"""
@@ -197,14 +217,15 @@ def guest_exec_status(self, pid: int) -> dict:
197217
:param pid: The PID returned by guest_exec
198218
:return: Dict with 'exited', 'exitcode', 'out-data', 'err-data' keys
199219
"""
200-
return self.qmp_client.command("guest-exec-status", pid=pid)
220+
qga = self._get_qga_client()
221+
return qga.command("guest-exec-status", pid=pid)
201222

202223
def reconfigure_guest_network(
203224
self,
204225
new_ip: str,
205226
gateway: str,
206227
nameservers: list[str],
207-
interface: str = "ens3",
228+
interface: str = "eth0",
208229
) -> dict:
209230
"""
210231
Reconfigure guest network via qemu-guest-agent after migration.
@@ -215,35 +236,31 @@ def reconfigure_guest_network(
215236
:param new_ip: New IP address with CIDR notation (e.g., "10.0.0.5/24")
216237
:param gateway: Gateway IP address (e.g., "10.0.0.1")
217238
:param nameservers: List of DNS server IPs (e.g., ["8.8.8.8", "8.8.4.4"])
218-
:param interface: Network interface name (default: "ens3")
239+
:param interface: Network interface name (default: "eth0")
219240
:return: Dict with 'pid' key for the started process
220241
"""
221-
# Build nameservers YAML list
222-
ns_yaml = "\n".join(f" - {ns}" for ns in nameservers)
223-
224-
netplan_config = f"""network:
225-
version: 2
226-
ethernets:
227-
{interface}:
228-
addresses: [{new_ip}]
229-
routes:
230-
- to: default
231-
via: {gateway}
232-
nameservers:
233-
addresses:
234-
{ns_yaml}
235-
"""
242+
network_config = {
243+
"network": {
244+
"version": 2,
245+
"ethernets": {
246+
interface: {
247+
"addresses": [new_ip],
248+
"routes": [{"to": "default", "via": gateway}],
249+
"nameservers": {"addresses": nameservers},
250+
},
251+
},
252+
},
253+
}
254+
netplan_yaml = yaml.safe_dump(network_config, default_flow_style=False, sort_keys=False)
236255

237256
# Create a script that writes the netplan config and applies it
238257
# Use base64 encoding to avoid escaping issues
239-
import base64
240-
241-
config_b64 = base64.b64encode(netplan_config.encode()).decode()
258+
config_b64 = base64.b64encode(netplan_yaml.encode()).decode()
242259

243260
script = f"""
244-
echo '{config_b64}' | base64 -d > /etc/netplan/50-cloud-init.yaml
245-
netplan apply
246-
"""
261+
echo '{config_b64}' | base64 -d > /etc/netplan/50-cloud-init.yaml
262+
netplan apply
263+
"""
247264

248265
return self.guest_exec("/bin/bash", ["-c", script])
249266

@@ -254,11 +271,12 @@ def wait_for_guest_agent(self, timeout_seconds: int = 60) -> bool:
254271
:param timeout_seconds: Maximum time to wait
255272
:return: True if agent is available, False if timeout
256273
"""
274+
qga = self._get_qga_client()
257275
start_time = time.monotonic()
258276
while time.monotonic() - start_time < timeout_seconds:
259277
try:
260278
# Try to ping the guest agent
261-
self.qmp_client.command("guest-ping")
279+
qga.command("guest-ping")
262280
return True
263281
except Exception:
264282
time.sleep(1)

src/aleph/vm/controllers/qemu/instance.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ async def configure(self, incoming_migration_port: int | None = None):
200200
image_path=image_path,
201201
monitor_socket_path=monitor_socket_path,
202202
qmp_socket_path=self.qmp_socket_path,
203+
qga_socket_path=self.qga_socket_path,
203204
vcpu_count=vcpu_count,
204205
mem_size_mb=mem_size_mb,
205206
interface_name=interface_name,
@@ -236,6 +237,10 @@ def save_controller_configuration(self):
236237
def qmp_socket_path(self) -> Path:
237238
return settings.EXECUTION_ROOT / f"{self.vm_hash}-qmp.socket"
238239

240+
@property
241+
def qga_socket_path(self) -> Path:
242+
return settings.EXECUTION_ROOT / f"{self.vm_hash}-qga.socket"
243+
239244
async def start(self):
240245
# Start via systemd not here
241246
raise NotImplementedError()

src/aleph/vm/hypervisors/qemu/qemuvm.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class QemuVM:
2323
image_path: str
2424
monitor_socket_path: Path
2525
qmp_socket_path: Path
26+
qga_socket_path: Path
2627
vcpu_count: int
2728
mem_size_mb: int
2829
interface_name: str
@@ -45,6 +46,7 @@ def __init__(self, vm_hash, config: QemuVMConfiguration):
4546
self.image_path = config.image_path
4647
self.monitor_socket_path = config.monitor_socket_path
4748
self.qmp_socket_path = config.qmp_socket_path
49+
self.qga_socket_path = config.qga_socket_path
4850
self.vcpu_count = config.vcpu_count
4951
self.mem_size_mb = config.mem_size_mb
5052
self.interface_name = config.interface_name
@@ -91,7 +93,7 @@ async def start(
9193
"-smp",
9294
str(self.vcpu_count),
9395
"-drive",
94-
f"file={self.image_path},media=disk,if=virtio",
96+
f"file={self.image_path},media=disk,if=virtio,file.locking=off",
9597
# To debug you can pass gtk or curses instead
9698
"-display",
9799
"none",
@@ -100,6 +102,13 @@ async def start(
100102
# Listen for commands on this socket
101103
"-monitor",
102104
f"unix:{self.monitor_socket_path},server,nowait",
105+
# Qemu Guest Agent communication channel options
106+
"-device",
107+
"virtio-serial",
108+
"-chardev",
109+
f"socket,path={self.qga_socket_path},server=on,wait=off,id=qga0",
110+
"-device",
111+
"virtserialport,chardev=qga0,name=org.qemu.guest_agent.0",
103112
# Listen for commands on this socket (QMP protocol in json). Supervisor use it to send shutdown or start
104113
# command
105114
"-qmp",

0 commit comments

Comments
 (0)