Skip to content

Commit 1fc024d

Browse files
committed
ci: refactor pack
Signed-off-by: thxCode <thxcode0824@gmail.com>
1 parent e9b2485 commit 1fc024d

File tree

2 files changed

+55
-25
lines changed

2 files changed

+55
-25
lines changed

.github/workflows/pack.yml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,21 +48,32 @@ jobs:
4848
version: "0.8.24"
4949
enable-cache: true
5050
python-version: ${{ env.INPUT_PYTHON_VERSION }}
51-
- name: Build
52-
run: |
53-
make build
5451
- name: Setup QEMU
5552
uses: docker/setup-qemu-action@v3
5653
with:
5754
image: tonistiigi/binfmt:qemu-v9.2.2
5855
platforms: "arm64"
5956
- name: Setup BuildX
57+
id: setup-buildx
6058
uses: docker/setup-buildx-action@v3
6159
with:
6260
driver-opts: |
6361
network=host
6462
env.BUILDKIT_STEP_LOG_MAX_SIZE=-1
6563
env.BUILDKIT_STEP_LOG_MAX_SPEED=-1
64+
- name: Setup BuildX Cache
65+
id: setup-buildx-cache
66+
uses: actions/cache@v4
67+
with:
68+
path: ${{ github.workspace }}/.cache
69+
key: cache-mount-${{ hashFiles('uv.lock') }}
70+
- name: Restore BuildX Cache
71+
uses: reproducible-containers/buildkit-cache-dance@v3
72+
with:
73+
builder: ${{ steps.setup-buildx.outputs.name }}
74+
cache-dir: ${{ github.workspace }}/.cache
75+
dockerfile: ${{ github.workspace }}/pack/Dockerfile
76+
skip-extraction: ${{ steps.setup-buildx-cache.outputs.cache-hit }}
6677
- name: Login DockerHub
6778
if: ${{ github.event_name != 'pull_request' }}
6879
uses: docker/login-action@v3

pack/Dockerfile

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -240,18 +240,33 @@ ARG TARGETPLATFORM
240240
ARG TARGETOS
241241
ARG TARGETARCH
242242

243+
## Update
244+
RUN <<EOF
245+
# Update
246+
247+
# Update PCI IDs database for better device name detection.
248+
curl -o /usr/share/misc/pci.ids https://pci-ids.ucw.cz/v2.2/pci.ids
249+
EOF
250+
243251
## Install
244252

245-
RUN --mount=type=bind,target=/workspace/runtime,rw <<EOF
253+
RUN --mount=type=cache,target=/root/.cache/uv \
254+
--mount=type=bind,target=/workspace/runtime,rw <<EOF
246255
# Install gpustack-runtime
247-
echo "Installing GPUStack runtime in the system Python environment..."
256+
257+
cd /workspace/runtime
258+
259+
make prepare
260+
248261
export UV_SYSTEM_PYTHON=1
249262
export UV_PRERELEASE=allow
250-
export UV_NO_CACHE=1
251-
uv pip install \
252-
/workspace/runtime
263+
export UV_LINK_MODE=copy
264+
export UV_NO_CACHE=0
265+
uv pip install .
253266
uv pip tree
254267
EOF
268+
ENV GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT="true" \
269+
GPUSTACK_RUNTIME_DOCKER_EPHEMERAL_FILES_DIR="/var/lib/gpustack/cache/gpustack-runtime"
255270

256271
## Entrypoint
257272

@@ -261,31 +276,32 @@ EOF
261276
## Options:
262277
## - Mount /sys from the host to detect the correct devices' PCI info.
263278
## - Mount /opt/rocm from the host to detect the correct ROCm version.
264-
## - Mount /usr/share/misc from the host to detect the correct device name.
265-
## E.g. docker run --privileged --runtime amd gpustack/runtime:main gpustack-runtime detect --format json
279+
## - Mount /usr/share/misc/pci.ids from the host to detect the correct device name.
280+
## E.g. docker run --rm -it --privileged -v /var/run/docker.sock:/var/run/docker.sock gpustack/runtime:main gpustack-runtime detect --format json
266281
RUN --mount=type=bind,from=rocm-base,source=/opt/rocm/share,target=/opt/rocm/share,rw <<EOF
267282
# Reinstall amd-smi
268-
echo "Installing GPUStack runtime in the system Python environment..."
283+
269284
export UV_SYSTEM_PYTHON=1
270285
export UV_PRERELEASE=allow
271-
export UV_NO_CACHE=1
272286
uv pip install --no-build-isolation \
273287
/opt/rocm/share/amd_smi
274288
uv pip tree
275289
EOF
276290
ENV AMD_VISIBLE_DEVICES="0" \
277-
ROCM_HOME="/opt/rocm"
291+
ROCM_HOME="/opt/rocm" \
292+
GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/rocm"
278293

279294
## Active all Ascend devices detection,
280295
## works with (default) Ascend container runtime and privileged mode.
281296
## See https://gitcode.com/Ascend/mind-cluster/blob/master/component/ascend-common/devmanager/dcmi/dcmi_interface_api.h.
282297
## Options:
283298
## - Mount /sys from the host to detect the correct devices' PCI info.
284299
## - Mount /usr/local/Ascend/ascend-toolkit to detect the correct CANN version and SoC name.
285-
## E.g. docker run --rm -it --privileged gpustack/runtime:main gpustack-runtime detect --format json
300+
## E.g. docker run --rm -it --privileged -v /var/run/docker.sock:/var/run/docker.sock gpustack/runtime:main gpustack-runtime detect --format json
286301
ENV ASCEND_VISIBLE_DEVICES="0" \
287302
ASCEND_HOME_PATH="/usr/local/Ascend/ascend-toolkit/latest" \
288-
LD_LIBRARY_PATH="/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/ascend-toolkit/latest/runtime/lib64:${LD_LIBRARY_PATH}"
303+
LD_LIBRARY_PATH="/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/ascend-toolkit/latest/runtime/lib64:${LD_LIBRARY_PATH}" \
304+
GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/usr/local/Ascend/ascend-toolkit;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
289305

290306
## Active all Cambricon devices detection,
291307
## works with (default) Cambricon container runtime and privileged mode.
@@ -294,16 +310,17 @@ ENV ASCEND_VISIBLE_DEVICES="0" \
294310
## Options:
295311
## - Mount /sys from the host to detect the correct devices' PCI info.
296312
## - Mount /usr/bin/cnmon to detect the correct devices. [TODO, TBD] maybe we can mount /usr/local/neuware to detect the correct Neuware version futher.
297-
## E.g. docker run --rm -it --privileged gpustack/runtime:main gpustack-runtime detect --format json
313+
## E.g. docker run --rm -it --privileged -v /var/run/docker.sock:/var/run/docker.sock gpustack/runtime:main gpustack-runtime detect --format json
298314
ENV CAMBRICON_VISIBLE_DEVICES="0"
299315

300316
## Active all Hygon devices detection,
301317
## works with (default) Hygon container runtime and privileged mode.
302318
## See https://github.com/Project-HAMi/dcu-dcgm/blob/master/pkg/dcgm/include/rocm_smi.h.
303319
## Options:
304320
## - Mount /sys from the host to detect the correct devices' PCI info.
305-
## E.g. docker run --rm -it --privileged gpustack/runtime:main gpustack-runtime detect --format json
306-
ENV HYGON_VISIBLE_DEVICES="0"
321+
## E.g. docker run --rm -it --privileged -v /var/run/docker.sock:/var/run/docker.sock gpustack/runtime:main gpustack-runtime detect --format json
322+
ENV HYGON_VISIBLE_DEVICES="0" \
323+
GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/dtk;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
307324

308325
## Active all Iluvatar devices detection,
309326
## works with (default) Iluvatar container runtime and privileged mode.
@@ -312,25 +329,27 @@ ENV HYGON_VISIBLE_DEVICES="0"
312329
## - Mount /sys from the host to detect the correct devices' PCI info.
313330
## - Mount /run/udev from the host to detect the correct devices' udev info.
314331
## - Mount /usr/bin/ixsmi to detect the correct devices. [TODO, TBD] maybe we can mount /usr/local/corex to detect the correct CoreX version futher.
315-
## E.g. docker run --rm -it --privileged gpustack/runtime:main gpustack-runtime detect --format json
332+
## E.g. docker run --rm -it --privileged -v /var/run/docker.sock:/var/run/docker.sock gpustack/runtime:main gpustack-runtime detect --format json
316333
ENV ILUVATAR_VISIBLE_DEVICES="0"
317334

318335
## Active all MetaX devices detection,
319336
## works with (default) MetaX container runtime and privileged mode.
320337
## See https://developer.metax-tech.com/api/client/document/preview/626/k8s/03_component.html#container-runtime.
321-
## Options:
322-
## - Mount /sys from the host to detect the correct devices' PCI info.
338+
## Requireds:
323339
## - Mount /opt/maca from the host to support device detecting.
324340
## - Mount /opt/mxdriver/ from the host to support device detecting.
325-
## E.g. docker run --rm -it --privileged -v /opt/mxdriver:/opt/mxdriver -v /opt/maca:/opt/maca gpustack/runtime:main gpustack-runtime detect --format json
326-
ENV LD_LIBRARY_PATH="/opt/maca/lib:/opt/maca/ompi/lib:/opt/maca/ucx/lib:/opt/mxdriver/lib:${LD_LIBRARY_PATH}"
341+
## Options:
342+
## - Mount /sys from the host to detect the correct devices' PCI info.
343+
## E.g. docker run --rm -it --privileged -v /var/run/docker.sock:/var/run/docker.sock -v /opt/mxdriver:/opt/mxdriver -v /opt/maca:/opt/maca gpustack/runtime:main gpustack-runtime detect --format json
344+
ENV LD_LIBRARY_PATH="/opt/maca/lib:/opt/maca/ompi/lib:/opt/maca/ucx/lib:/opt/mxdriver/lib:${LD_LIBRARY_PATH}" \
345+
GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES="/opt/maca;${GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT_IGNORE_VOLUMES}"
327346

328347
## Active all MThread devices detection,
329348
## works with (default) MThread container runtime and privileged mode.
330349
## See https://docs.mthreads.com/cloud-native/cloud-native-doc-online/install_guide.
331350
## Options:
332351
## - Mount /sys from the host to detect the correct devices' PCI info.
333-
## E.g. docker run --rm -it --privileged gpustack/runtime:main gpustack-runtime detect --format json
352+
## E.g. docker run --rm -it --privileged -v /var/run/docker.sock:/var/run/docker.sock gpustack/runtime:main gpustack-runtime detect --format json
334353
ENV MTHREADS_VISIBLE_DEVICES="0" \
335354
MTHREADS_DRIVER_CAPABILITIES="compute,utility"
336355

@@ -339,7 +358,7 @@ ENV MTHREADS_VISIBLE_DEVICES="0" \
339358
## See https://docs.nvidia.com/deploy/nvml-api/nvml-api-reference.html#nvml-api-reference.
340359
## Options:
341360
## - Mount /sys from the host to detect the correct devices' PCI info.
342-
## E.g. docker run --rm -it --privileged gpustack/runtime:main gpustack-runtime detect --format json
361+
## E.g. docker run --rm -it --privileged -v /var/run/docker.sock:/var/run/docker.sock gpustack/runtime:main gpustack-runtime detect --format json
343362
ENV NVIDIA_DISABLE_REQUIRE="true" \
344363
NVIDIA_VISIBLE_DEVICES="0" \
345364
NVIDIA_DRIVER_CAPABILITIES="compute,utility"

0 commit comments

Comments
 (0)