Skip to content

Commit 240c8a8

Browse files
Andres D. Molinsaliel
authored andcommitted
Fix: Block confidential VMs to install qemu guest agent tools and prevent them to be migrated.
1 parent 1955c1f commit 240c8a8

3 files changed

Lines changed: 32 additions & 6 deletions

File tree

src/aleph/vm/controllers/qemu/cloudinit.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def get_hostname_from_hash(vm_hash: ItemHash) -> str:
3232
return base64.b32encode(item_hash_binary).decode().strip("=").lower()
3333

3434

35-
def encode_user_data(hostname, ssh_authorized_keys, has_gpu: bool = False) -> bytes:
35+
def encode_user_data(hostname, ssh_authorized_keys, has_gpu: bool = False, install_guest_agent: bool = True) -> bytes:
3636
"""Creates user data configuration file for cloud-init tool"""
3737
config: dict[str, str | bool | list[str] | list[list[str]]] = {
3838
"hostname": hostname,
@@ -41,8 +41,6 @@ def encode_user_data(hostname, ssh_authorized_keys, has_gpu: bool = False) -> by
4141
"ssh_authorized_keys": ssh_authorized_keys,
4242
"resize_rootfs": True,
4343
"package_update": True,
44-
"packages": ["qemu-guest-agent"],
45-
"runcmd": ["systemctl start qemu-guest-agent.service"],
4644
}
4745

4846
# Add kernel boot parameters for GPU instances to speed up PCI enumeration
@@ -68,6 +66,10 @@ def encode_user_data(hostname, ssh_authorized_keys, has_gpu: bool = False) -> by
6866
],
6967
]
7068

69+
if install_guest_agent:
70+
config["packages"] = ["qemu-guest-agent"]
71+
config["runcmd"] = ["systemctl start qemu-guest-agent.service"]
72+
7173
cloud_config_header = "#cloud-config\n"
7274
config_output = yaml.safe_dump(config, default_flow_style=False, sort_keys=False)
7375
content = (cloud_config_header + config_output).encode()
@@ -119,13 +121,14 @@ async def create_cloud_init_drive_image(
119121
route,
120122
ssh_authorized_keys,
121123
has_gpu: bool = False,
124+
install_guest_agent: bool = True,
122125
):
123126
with (
124127
NamedTemporaryFile() as user_data_config_file,
125128
NamedTemporaryFile() as network_config_file,
126129
NamedTemporaryFile() as metadata_config_file,
127130
):
128-
user_data = encode_user_data(hostname, ssh_authorized_keys, has_gpu=has_gpu)
131+
user_data = encode_user_data(hostname, ssh_authorized_keys, has_gpu=has_gpu, install_guest_agent=install_guest_agent)
129132
user_data_config_file.write(user_data)
130133
user_data_config_file.flush()
131134
network_config = create_network_file(ip, ipv6, ipv6_gateway, nameservers, route)
@@ -148,7 +151,7 @@ async def create_cloud_init_drive_image(
148151

149152

150153
class CloudInitMixin(AlephVmControllerInterface):
151-
async def _create_cloud_init_drive(self) -> Drive:
154+
async def _create_cloud_init_drive(self, install_guest_agent: bool = True) -> Drive:
152155
"""Creates the cloud-init volume to configure and set up the VM"""
153156
ssh_authorized_keys = self.resources.message_content.authorized_keys or []
154157
if settings.USE_DEVELOPER_SSH_KEYS:
@@ -178,6 +181,7 @@ async def _create_cloud_init_drive(self) -> Drive:
178181
route,
179182
ssh_authorized_keys,
180183
has_gpu=has_gpu,
184+
install_guest_agent=install_guest_agent,
181185
)
182186

183187
return Drive(

src/aleph/vm/controllers/qemu_confidential/instance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ async def configure(self, incoming_migration_port: int | None = None):
9191
logger.debug(f"Making Qemu configuration: {self}")
9292
monitor_socket_path = settings.EXECUTION_ROOT / (str(self.vm_id) + "-monitor.socket")
9393

94-
cloud_init_drive = await self._create_cloud_init_drive()
94+
cloud_init_drive = await self._create_cloud_init_drive(install_guest_agent=False)
9595

9696
image_path = str(self.resources.rootfs_path)
9797
firmware_path = str(self.resources.firmware_path)

src/aleph/vm/orchestrator/views/migration.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,17 @@ async def allocate_migration(request: web.Request) -> web.Response:
156156
status=HTTPStatus.BAD_REQUEST,
157157
)
158158

159+
# Reject confidential VMs - they cannot be live-migrated
160+
if message.content.environment.trusted_execution is not None:
161+
return web.json_response(
162+
{
163+
"status": "error",
164+
"error": "Live migration is not supported for confidential VMs",
165+
"vm_hash": str(vm_hash),
166+
},
167+
status=HTTPStatus.BAD_REQUEST,
168+
)
169+
159170
# Create VM prepared for incoming migration
160171
execution = await pool.create_a_vm(
161172
vm_hash=vm_hash,
@@ -247,6 +258,17 @@ async def migration_start(request: web.Request) -> web.Response:
247258
status=HTTPStatus.BAD_REQUEST,
248259
)
249260

261+
# Reject confidential VMs - they cannot be live-migrated
262+
if execution.is_confidential:
263+
return web.json_response(
264+
{
265+
"status": "error",
266+
"error": "Live migration is not supported for confidential VMs",
267+
"vm_hash": str(vm_hash),
268+
},
269+
status=HTTPStatus.BAD_REQUEST,
270+
)
271+
250272
# Check that VM object exists
251273
if not execution.vm:
252274
return web.json_response(

0 commit comments

Comments
 (0)