Skip to content

Commit b38b8f1

Browse files
authored
Fix support for containers w/ CUDA 13.0+ (#831)
1 parent 5699247 commit b38b8f1

File tree

7 files changed

+66
-15
lines changed

7 files changed

+66
-15
lines changed

Cargo.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ default-members = [
2222
resolver = "2"
2323

2424
[workspace.package]
25-
version = "1.9.0"
25+
version = "1.9.1"
2626
edition = "2021"
2727
authors = ["Olivier Dehaene", "Nicolas Patry", "Alvaro Bartolome"]
2828
homepage = "https://github.com/huggingface/text-embeddings-inference"

Dockerfile-cuda

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ ARG DEFAULT_USE_FLASH_ATTENTION=True
126126
ENV HUGGINGFACE_HUB_CACHE=/data \
127127
PORT=80 \
128128
USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION \
129-
LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH}"
129+
LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
130130

131131
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
132132
ca-certificates \
@@ -135,16 +135,17 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
135135
cuda-compat-12-9 \
136136
&& rm -rf /var/lib/apt/lists/*
137137

138+
COPY --chmod=775 cuda-entrypoint.sh entrypoint.sh
139+
138140
FROM base AS grpc
139141

140142
COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
141143

142-
ENTRYPOINT ["text-embeddings-router"]
144+
ENTRYPOINT ["./entrypoint.sh"]
143145
CMD ["--json-output"]
144146

145147
FROM base
146148

147149
COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
148-
149-
ENTRYPOINT ["text-embeddings-router"]
150+
ENTRYPOINT ["./entrypoint.sh"]
150151
CMD ["--json-output"]

Dockerfile-cuda-all

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ ARG DEFAULT_USE_FLASH_ATTENTION=True
115115
ENV HUGGINGFACE_HUB_CACHE=/data \
116116
PORT=80 \
117117
USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION \
118-
LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH}"
118+
LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
119119

120120
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
121121
ca-certificates \

cuda-all-entrypoint.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,27 @@ if ! command -v nvidia-smi &>/dev/null; then
55
exit 1
66
fi
77

8+
# NOTE: Given that we need to support CUDA versions earlier than CUDA 12.9.1, we
9+
# need to include the `cuda-compat-12-9` in `LD_LIBRARY_PATH` when the host CUDA
10+
# version is lower than that; whilst we shouldn't include that when CUDA is 13.0+
11+
# as otherwise it will fail due to it.
12+
if [ -d /usr/local/cuda/compat ]; then
13+
DRIVER_CUDA=$(nvidia-smi 2>/dev/null | awk '/CUDA Version/ {print $3; exit}')
14+
15+
IFS='.' read -r MAJ MIN PATCH <<EOF
16+
${DRIVER_CUDA:-0.0.0}
17+
EOF
18+
: "${MIN:=0}"
19+
: "${PATCH:=0}"
20+
21+
DRIVER_INT=$((10#${MAJ} * 10000 + 10#${MIN} * 100 + 10#${PATCH}))
22+
TARGET_INT=$((12 * 10000 + 9 * 100 + 1))
23+
24+
if [ "$DRIVER_INT" -lt "$TARGET_INT" ]; then
25+
export LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH}"
26+
fi
27+
fi
28+
829
compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
930

1031
if [ ${compute_cap} -eq 75 ]; then

cuda-entrypoint.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
3+
if ! command -v nvidia-smi &>/dev/null; then
4+
echo "Error: 'nvidia-smi' command not found."
5+
exit 1
6+
fi
7+
8+
# NOTE: Given that we need to support CUDA versions earlier than CUDA 12.9.1, we
9+
# need to include the `cuda-compat-12-9` in `LD_LIBRARY_PATH` when the host CUDA
10+
# version is lower than that; whilst we shouldn't include that when CUDA is 13.0+
11+
# as otherwise it will fail due to it.
12+
if [ -d /usr/local/cuda/compat ]; then
13+
DRIVER_CUDA=$(nvidia-smi 2>/dev/null | awk '/CUDA Version/ {print $3; exit}')
14+
15+
IFS='.' read -r MAJ MIN PATCH <<EOF
16+
${DRIVER_CUDA:-0.0.0}
17+
EOF
18+
: "${MIN:=0}"
19+
: "${PATCH:=0}"
20+
21+
DRIVER_INT=$((10#${MAJ} * 10000 + 10#${MIN} * 100 + 10#${PATCH}))
22+
TARGET_INT=$((12 * 10000 + 9 * 100 + 1))
23+
24+
if [ "$DRIVER_INT" -lt "$TARGET_INT" ]; then
25+
export LD_LIBRARY_PATH="/usr/local/cuda/compat:${LD_LIBRARY_PATH}"
26+
fi
27+
fi
28+
29+
exec text-embeddings-router "$@"

docs/openapi.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"name": "Apache 2.0",
1111
"url": "https://www.apache.org/licenses/LICENSE-2.0"
1212
},
13-
"version": "1.9.0"
13+
"version": "1.9.1"
1414
},
1515
"paths": {
1616
"/decode": {

0 commit comments

Comments
 (0)