Skip to content

Commit e9cb346

Browse files
committed
refactor: enhance docker mirrored deployment
Signed-off-by: thxCode <thxcode0824@gmail.com>
1 parent 5b1a48f commit e9cb346

File tree

2 files changed

+53
-4
lines changed

2 files changed

+53
-4
lines changed

gpustack_runtime/deployer/docker.py

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,23 @@ def _prepare_create(self):
995995
self_container_id,
996996
)
997997
try:
998-
self_container = self._client.containers.get(self_container_id)
998+
if envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME:
999+
# Directly get container by name or ID.
1000+
self_container = self._client.containers.get(self_container_id)
1001+
else:
1002+
# Find containers that matches the hostname.
1003+
containers = self._client.containers.list()
1004+
containers = [
1005+
c
1006+
for c in containers
1007+
if c.attrs["Config"].get("Hostname", "") == self_container_id
1008+
]
1009+
if len(containers) != 1:
1010+
msg = f"Container with name {self_container_id} not found"
1011+
raise docker.errors.NotFound(
1012+
msg,
1013+
)
1014+
self_container = containers[0]
9991015
self_image = self_container.image
10001016
except docker.errors.APIError:
10011017
output_log = logger.warning
@@ -1048,7 +1064,7 @@ def _prepare_create(self):
10481064
]
10491065
## - Container customized devices
10501066
mirrored_devices: list[dict[str, Any]] = (
1051-
self_container.attrs["HostConfig"].get("Devices", []) or []
1067+
self_container.attrs["HostConfig"].get("Devices") or []
10521068
)
10531069
if igs := envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES:
10541070
mirrored_devices = [
@@ -1059,7 +1075,17 @@ def _prepare_create(self):
10591075
]
10601076
## - Container customized device requests
10611077
mirrored_device_requests: list[dict[str, Any]] = (
1062-
self_container.attrs["HostConfig"].get("DeviceRequests", []) or []
1078+
self_container.attrs["HostConfig"].get("DeviceRequests") or []
1079+
)
1080+
## - Container capabilities
1081+
mirrored_capabilities: dict[str, list[str]] = {}
1082+
if cap := self_container.attrs["HostConfig"].get("CapAdd"):
1083+
mirrored_capabilities["add"] = cap
1084+
if cap := self_container.attrs["HostConfig"].get("CapDrop"):
1085+
mirrored_capabilities["drop"] = cap
1086+
## - Container group_adds
1087+
mirrored_group_adds: list[str] = (
1088+
self_container.attrs["HostConfig"].get("GroupAdd") or []
10631089
)
10641090

10651091
# Construct mutation function.
@@ -1156,6 +1182,27 @@ def mutate_create_options(create_options: dict[str, Any]) -> dict[str, Any]:
11561182
)
11571183
create_options["device_requests"] = c_device_requests
11581184

1185+
if mirrored_capabilities:
1186+
if "cap_add" in mirrored_capabilities:
1187+
c_cap_add: list[str] = create_options.get("cap_add", [])
1188+
for c_cap in mirrored_capabilities["add"]:
1189+
if c_cap not in c_cap_add:
1190+
c_cap_add.append(c_cap)
1191+
create_options["cap_add"] = c_cap_add
1192+
if "cap_drop" in mirrored_capabilities:
1193+
c_cap_drop: list[str] = create_options.get("cap_drop", [])
1194+
for c_cap in mirrored_capabilities["drop"]:
1195+
if c_cap not in c_cap_drop:
1196+
c_cap_drop.append(c_cap)
1197+
create_options["cap_drop"] = c_cap_drop
1198+
1199+
if mirrored_group_adds:
1200+
c_group_adds: list[str] = create_options.get("group_add", [])
1201+
for c_ga in mirrored_group_adds:
1202+
if c_ga not in c_group_adds:
1203+
c_group_adds.append(c_ga)
1204+
create_options["group_add"] = c_group_adds
1205+
11591206
return create_options
11601207

11611208
self._mutate_create_options = mutate_create_options

gpustack_runtime/envs.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@
4848
it will configure the workload with the same following settings as the deployer:
4949
- Container Runtime(e.g., nvidia, amd, .etc),
5050
- Customized environment variables,
51-
- Customized volume mounts.
51+
- Customized volume mounts,
52+
- Customized device or device requests,
53+
- Customized capabilities.
5254
To be noted, without `GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME` configured,
5355
if the deployer failed to retrieve its own settings, it will skip mirrored deployment.
5456
"""

0 commit comments

Comments
 (0)