Skip to content

Commit 4849661

Browse files
authored
docker : add CUDA 13.1 image build (ggml-org#18441)
* add updated cuda-new.Dockerfile for Ubuntu 24.04 compatibilty * add cuda13 build
1 parent 6e0c8cb commit 4849661

File tree

2 files changed

+119
-11
lines changed

2 files changed

+119
-11
lines changed

.devops/cuda-new.Dockerfile

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
ARG UBUNTU_VERSION=24.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=13.1.0
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10+
11+
# CUDA architecture to build for (defaults to all supported archs)
12+
ARG CUDA_DOCKER_ARCH=default
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
22+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
23+
fi && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
25+
cmake --build build --config Release -j$(nproc)
26+
27+
RUN mkdir -p /app/lib && \
28+
find build -name "*.so*" -exec cp -P {} /app/lib \;
29+
30+
RUN mkdir -p /app/full \
31+
&& cp build/bin/* /app/full \
32+
&& cp *.py /app/full \
33+
&& cp -r gguf-py /app/full \
34+
&& cp -r requirements /app/full \
35+
&& cp requirements.txt /app/full \
36+
&& cp .devops/tools.sh /app/full/tools.sh
37+
38+
## Base image
39+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
40+
41+
RUN apt-get update \
42+
&& apt-get install -y libgomp1 curl\
43+
&& apt autoremove -y \
44+
&& apt clean -y \
45+
&& rm -rf /tmp/* /var/tmp/* \
46+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
47+
&& find /var/cache -type f -delete
48+
49+
COPY --from=build /app/lib/ /app
50+
51+
### Full
52+
FROM base AS full
53+
54+
COPY --from=build /app/full /app
55+
56+
WORKDIR /app
57+
58+
RUN apt-get update \
59+
&& apt-get install -y \
60+
git \
61+
python3 \
62+
python3-pip \
63+
python3-wheel \
64+
&& pip install --break-system-packages --upgrade setuptools \
65+
&& pip install --break-system-packages -r requirements.txt \
66+
&& apt autoremove -y \
67+
&& apt clean -y \
68+
&& rm -rf /tmp/* /var/tmp/* \
69+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
70+
&& find /var/cache -type f -delete
71+
72+
73+
ENTRYPOINT ["/app/tools.sh"]
74+
75+
### Light, CLI only
76+
FROM base AS light
77+
78+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
79+
80+
WORKDIR /app
81+
82+
ENTRYPOINT [ "/app/llama-cli" ]
83+
84+
### Server, Server only
85+
FROM base AS server
86+
87+
ENV LLAMA_ARG_HOST=0.0.0.0
88+
89+
COPY --from=build /app/full/llama-server /app
90+
91+
WORKDIR /app
92+
93+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
94+
95+
ENTRYPOINT [ "/app/llama-server" ]

.github/workflows/docker.yml

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ jobs:
4040
# https://github.com/ggml-org/llama.cpp/issues/11888
4141
#- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
4242
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
43-
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
43+
- { tag: "cuda cuda12", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "12.4.0", ubuntu_version: "22.04" }
44+
- { tag: "cuda13", dockerfile: ".devops/cuda-new.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "13.1.0", ubuntu_version: "24.04" }
4445
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
4546
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" }
4647
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" }
@@ -80,18 +81,21 @@ jobs:
8081
run: |
8182
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
8283
REPO_NAME="${{ github.event.repository.name }}"
84+
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
8385
8486
# list all tags possible
85-
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
86-
TYPE=""
87-
else
88-
TYPE="-${{ matrix.config.tag }}"
89-
fi
90-
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
91-
CACHETAGS="${PREFIX}buildcache${TYPE}"
92-
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
93-
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
94-
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
87+
tags="${{ matrix.config.tag }}"
88+
for tag in $tags; do
89+
if [[ "$tag" == "cpu" ]]; then
90+
TYPE=""
91+
else
92+
TYPE="-$tag"
93+
fi
94+
CACHETAGS="${PREFIX}buildcache${TYPE}"
95+
FULLTAGS="${FULLTAGS:+$FULLTAGS,}${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
96+
LIGHTTAGS="${LIGHTTAGS:+$LIGHTTAGS,}${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
97+
SERVERTAGS="${SERVERTAGS:+$SERVERTAGS,}${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
98+
done
9599
echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT
96100
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
97101
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
@@ -132,6 +136,9 @@ jobs:
132136
file: ${{ matrix.config.dockerfile }}
133137
target: full
134138
provenance: false
139+
build-args: |
140+
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
141+
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
135142
# using github experimental cache
136143
#cache-from: type=gha
137144
#cache-to: type=gha,mode=max
@@ -154,6 +161,9 @@ jobs:
154161
file: ${{ matrix.config.dockerfile }}
155162
target: light
156163
provenance: false
164+
build-args: |
165+
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
166+
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
157167
# using github experimental cache
158168
#cache-from: type=gha
159169
#cache-to: type=gha,mode=max
@@ -176,6 +186,9 @@ jobs:
176186
file: ${{ matrix.config.dockerfile }}
177187
target: server
178188
provenance: false
189+
build-args: |
190+
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
191+
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
179192
# using github experimental cache
180193
#cache-from: type=gha
181194
#cache-to: type=gha,mode=max

0 commit comments

Comments
 (0)