Skip to content

Commit c198fa7

Browse files
committed
refactor: enhance mirrored deployment self container detection
Signed-off-by: thxCode <[email protected]>
1 parent e72b990 commit c198fa7

File tree

5 files changed

+73
-19
lines changed

5 files changed

+73
-19
lines changed

.github/workflows/pack.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ jobs:
107107
platforms: "linux/amd64,linux/arm64"
108108
build-args: |
109109
PYTHON_VERSION=${{ env.INPUT_PYTHON_VERSION }}
110+
GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS="${{ steps.metadata.outputs.labels }}"
110111
tags: |
111112
${{ steps.metadata.outputs.tags }}
112113
labels: |

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ package:
9494
--buildkitd-flags "--allow-insecure-entitlement=security.insecure --allow-insecure-entitlement=network.host --oci-worker-net=host --oci-worker-gc-keepstorage=204800" \
9595
--bootstrap; \
9696
fi
97+
LABELS=("org.opencontainers.image.source=https://github.com/gpustack/runtime" "org.opencontainers.image.revision=$(GIT_COMMIT)" "org.opencontainers.image.title=runtime" "org.opencontainers.image.version=main" "org.opencontainers.image.url=https://github.com/gpustack/runtime"); \
9798
TAG=$(PACKAGE_NAMESPACE)/$(PACKAGE_REPOSITORY):$(PACKAGE_TAG); \
9899
EXTRA_ARGS=(); \
99100
if [[ "$(PACKAGE_WITH_CACHE)" == "true" ]]; then \
@@ -116,6 +117,7 @@ package:
116117
--ulimit nofile=65536:65536 \
117118
--shm-size 16G \
118119
--progress plain \
120+
--build-arg "GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS=$$(printf "%s\\n" "$${LABELS[@]}")" \
119121
$${EXTRA_ARGS[@]} \
120122
$(SRCDIR); \
121123
set +x

gpustack_runtime/deployer/docker.py

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,30 +1161,14 @@ def _prepare_create(self):
11611161
self_container_id,
11621162
)
11631163
try:
1164-
if envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME:
1165-
# Directly get container by name or ID.
1166-
self_container = self._client.containers.get(self_container_id)
1167-
else:
1168-
# Find containers that matches the hostname.
1169-
containers = self._client.containers.list()
1170-
containers = [
1171-
c
1172-
for c in containers
1173-
if c.attrs["Config"].get("Hostname", "") == self_container_id
1174-
]
1175-
if len(containers) != 1:
1176-
msg = f"Container with name {self_container_id} not found"
1177-
raise docker.errors.NotFound(
1178-
msg,
1179-
)
1180-
self_container = containers[0]
1164+
self_container = self._find_self_container(self_container_id)
11811165
self_image = self_container.image
1182-
except docker.errors.APIError:
1166+
except docker.errors.APIError as e:
11831167
output_log = logger.warning
11841168
if logger.isEnabledFor(logging.DEBUG):
11851169
output_log = logger.exception
11861170
output_log(
1187-
f"Mirrored deployment enabled, but failed to get self Container {self_container_id}, skipping",
1171+
f"Mirrored deployment enabled, but failed to get self Container {self_container_id}, skipping: {e}",
11881172
)
11891173
return
11901174

@@ -1394,6 +1378,55 @@ def mutate_create_options(create_options: dict[str, Any]) -> dict[str, Any]:
13941378
result = result.joinpath(b_subpath.lstrip("/"))
13951379
self._container_ephemeral_files_dir = result
13961380

1381+
def _find_self_container(
1382+
self,
1383+
self_container_id: str,
1384+
) -> docker.models.containers.Container:
1385+
"""
1386+
Find the current container if running inside a Docker container.
1387+
1388+
Args:
1389+
self_container_id:
1390+
The container name or ID to find.
1391+
1392+
Returns:
1393+
The Docker container if found, None otherwise.
1394+
1395+
Raises:
1396+
If failed to find itself.
1397+
1398+
"""
1399+
if envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME:
1400+
# Directly get container by name or ID.
1401+
return self._client.containers.get(self_container_id)
1402+
1403+
# Find containers that matches the hostname.
1404+
containers: list[docker.models.containers.Container] = []
1405+
for c in self._client.containers.list():
1406+
# Ignore workload containers with host network enabled.
1407+
if _LABEL_WORKLOAD in c.labels:
1408+
continue
1409+
# Ignore containers that do not match the hostname.
1410+
if c.attrs["Config"].get("Hostname", "") != self_container_id:
1411+
continue
1412+
# Ignore containers that do not match the filter labels.
1413+
if envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS and any(
1414+
c.labels.get(k) != v
1415+
for k, v in envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS.items()
1416+
):
1417+
continue
1418+
containers.append(c)
1419+
1420+
# Validate found containers.
1421+
if len(containers) != 1:
1422+
msg = (
1423+
f"Found multiple Containers with the same hostname {self_container_id}, "
1424+
"please use `--env GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME=...` to specify the exact container name"
1425+
)
1426+
raise docker.errors.NotFound(msg)
1427+
1428+
return containers[0]
1429+
13971430
@_supported
13981431
def _create(self, workload: WorkloadPlan):
13991432
"""

gpustack_runtime/envs.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,12 @@
132132
# Deployer
133133

134134
## Docker
135+
GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS: dict[str, str] | None = None
136+
"""
137+
Filter labels for selecting the mirrored deployer container in Docker.
138+
Only works when `GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME` is not set.
139+
Normally, it should be injected automatically via CI without any manual configuration.
140+
"""
135141
GPUSTACK_RUNTIME_DOCKER_PAUSE_IMAGE: str | None = None
136142
"""
137143
Docker image used for the pause container.
@@ -284,6 +290,12 @@
284290
getenv("GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY", "1"),
285291
),
286292
# Deployer
293+
"GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS": lambda: to_dict(
294+
getenv(
295+
"GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS",
296+
),
297+
sep="\n",
298+
),
287299
"GPUSTACK_RUNTIME_DOCKER_PAUSE_IMAGE": lambda: getenv(
288300
"GPUSTACK_RUNTIME_DOCKER_PAUSE_IMAGE",
289301
"gpustack/runtime:pause",

pack/Dockerfile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
# - PYTHON_VERSION: Version of Python to use.
1616
# - GPUSTACK_RUNTIME_BASE_IMAGE: Base image for the gpustack-runtime stage.
1717
# - GPUSTACK_RUNTIME_ROCM_VERSION: Version of ROCm to vendor libraries from, update this if project dependencies has changed.
18+
# - GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS: Line-separated list of labels to filter mirrored images when deploying mirrored deployment.
1819
ARG PYTHON_VERSION=3.11
1920
ARG GPUSTACK_RUNTIME_BASE_IMAGE=runtime
2021
ARG GPUSTACK_RUNTIME_ROCM_VERSION=6.2.4
22+
ARG GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS
2123

2224
FROM ubuntu:22.04@sha256:3c61d3759c2639d4b836d32a2d3c83fa0214e36f195a3421018dbaaf79cbe37f AS runtime
2325
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
@@ -471,5 +473,9 @@ ENV NVIDIA_DISABLE_REQUIRE="true" \
471473
NVIDIA_VISIBLE_DEVICES="all" \
472474
NVIDIA_DRIVER_CAPABILITIES="compute,utility"
473475

476+
ARG GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS
477+
478+
ENV GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT="true" \
479+
GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS="${GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS}"
474480
WORKDIR /
475481
ENTRYPOINT [ "tini", "--" ]

0 commit comments

Comments
 (0)