Skip to content

Commit 2f154d9

Browse files
authored
Update CUDA to 12.9 w/ cuda-compat-12-9 (#828)
1 parent 5cdaee0 commit 2f154d9

File tree

10 files changed

+50
-288
lines changed

10 files changed

+50
-288
lines changed

.github/workflows/matrix.json

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,22 +45,14 @@
4545
"grpc": true,
4646
"dockerfile": "Dockerfile-cuda"
4747
},
48-
{
49-
"name": "turing-ampere-ada-hopper",
50-
"imageNamePrefix": "cuda-",
51-
"runOn": "always",
52-
"sccache": false,
53-
"grpc": false,
54-
"dockerfile": "Dockerfile-cuda-all"
55-
},
5648
{
5749
"name": "blackwell-100",
5850
"imageNamePrefix": "100-",
5951
"runOn": "always",
6052
"sccache": true,
6153
"cudaComputeCap": 100,
6254
"grpc": true,
63-
"dockerfile": "Dockerfile-cuda-blackwell"
55+
"dockerfile": "Dockerfile-cuda"
6456
},
6557
{
6658
"name": "blackwell-120",
@@ -69,15 +61,15 @@
6961
"sccache": true,
7062
"cudaComputeCap": 120,
7163
"grpc": true,
72-
"dockerfile": "Dockerfile-cuda-blackwell"
64+
"dockerfile": "Dockerfile-cuda"
7365
},
7466
{
75-
"name": "blackwell",
76-
"imageNamePrefix": "blackwell-",
67+
"name": "all",
68+
"imageNamePrefix": "cuda-",
7769
"runOn": "always",
7870
"sccache": false,
7971
"grpc": false,
80-
"dockerfile": "Dockerfile-cuda-blackwell-all"
72+
"dockerfile": "Dockerfile-cuda-all"
8173
},
8274
{
8375
"name": "cpu",

Dockerfile-cuda

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM nvidia/cuda:12.6.3-devel-ubuntu24.04 AS base-builder
1+
FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS base-builder
22

33
ENV SCCACHE=0.10.0
44
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
@@ -58,6 +58,12 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
5858
elif [ ${CUDA_COMPUTE_CAP} -eq 90 ]; \
5959
then \
6060
nvprune --generate-code code=sm_90 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \
61+
elif [ ${CUDA_COMPUTE_CAP} -eq 100 ]; \
62+
then \
63+
nvprune --generate-code code=sm_100 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \
64+
elif [ ${CUDA_COMPUTE_CAP} -eq 120 ]; \
65+
then \
66+
nvprune --generate-code code=sm_120 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \
6167
else \
6268
echo "cuda compute cap ${CUDA_COMPUTE_CAP} is not supported"; exit 1; \
6369
fi;
@@ -113,7 +119,7 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
113119
cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F grpc --no-default-features && sccache -s; \
114120
fi;
115121

116-
FROM nvidia/cuda:12.6.3-base-ubuntu24.04 AS base
122+
FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS base
117123

118124
ARG DEFAULT_USE_FLASH_ATTENTION=True
119125

@@ -126,7 +132,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
126132
ca-certificates \
127133
libssl-dev \
128134
curl \
129-
cuda-compat-12-6 \
135+
cuda-compat-12-9 \
130136
&& rm -rf /var/lib/apt/lists/*
131137

132138
FROM base AS grpc

Dockerfile-cuda-all

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM nvidia/cuda:12.6.3-devel-ubuntu24.04 AS base-builder
1+
FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS base-builder
22

33
ENV SCCACHE=0.10.0
44
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
@@ -64,6 +64,14 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
6464
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
6565
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
6666

67+
RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
68+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
69+
CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
70+
71+
RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
72+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
73+
CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
74+
6775
COPY backends backends
6876
COPY core core
6977
COPY router router
@@ -88,7 +96,19 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
8896

8997
RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90
9098

91-
FROM nvidia/cuda:12.6.3-base-ubuntu24.04 AS base
99+
RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
100+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
101+
CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
102+
103+
RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-100
104+
105+
RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
106+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
107+
CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
108+
109+
RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-120
110+
111+
FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS base
92112

93113
ARG DEFAULT_USE_FLASH_ATTENTION=True
94114

@@ -101,12 +121,14 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
101121
ca-certificates \
102122
libssl-dev \
103123
curl \
104-
cuda-compat-12-6 \
124+
cuda-compat-12-9 \
105125
&& rm -rf /var/lib/apt/lists/*
106126

107127
COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75
108128
COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
109129
COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90
130+
COPY --from=builder /usr/src/target/release/text-embeddings-router-100 /usr/local/bin/text-embeddings-router-100
131+
COPY --from=builder /usr/src/target/release/text-embeddings-router-120 /usr/local/bin/text-embeddings-router-120
110132

111133
COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh
112134

Dockerfile-cuda-blackwell

Lines changed: 0 additions & 124 deletions
This file was deleted.

Dockerfile-cuda-blackwell-all

Lines changed: 0 additions & 103 deletions
This file was deleted.

README.md

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -587,23 +587,13 @@ runtime_compute_cap=89
587587
# Example for Hopper (H100, ...)
588588
runtime_compute_cap=90
589589

590-
docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap
591-
```
592-
593-
If your CUDA device architecture is Blackwell, then you need to run the following
594-
instead, as CUDA 12.9 is required, hence the Dockerfile differs:
595-
596-
```shell
597-
# Get submodule dependencies
598-
git submodule update --init
599-
600590
# Example for Blackwell (B200, GB200, ...)
601591
runtime_compute_cap=100
602592

603593
# Example for Blackwell (GeForce RTX 50X0, RTX PRO 6000, ...)
604594
runtime_compute_cap=120
605595

606-
docker build . -f Dockerfile-cuda-blackwell --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap
596+
docker build . -f Dockerfile-cuda --build-arg CUDA_COMPUTE_CAP=$runtime_compute_cap
607597
```
608598

609599
### Apple M1/M2 arm64 architectures

0 commit comments

Comments
 (0)