| 
1 | 1 | ARG UBUNTU_VERSION=22.04  | 
2 | 2 | # This needs to generally match the container host's environment.  | 
3 |  | -ARG CUDA_VERSION=11.7.1  | 
 | 3 | +ARG CUDA_VERSION=12.6.0  | 
4 | 4 | # Target the CUDA build image  | 
5 | 5 | ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}  | 
6 | 6 | # Target the CUDA runtime image  | 
7 | 7 | ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}  | 
8 | 8 | 
 
  | 
9 | 9 | FROM ${BASE_CUDA_DEV_CONTAINER} AS build  | 
10 | 10 | 
 
  | 
11 |  | -# Unless otherwise specified, we make a fat build.  | 
12 |  | -ARG CUDA_DOCKER_ARCH=all  | 
 | 11 | +# CUDA architecture to build for (defaults to all supported archs)  | 
 | 12 | +ARG CUDA_DOCKER_ARCH=default  | 
13 | 13 | 
 
  | 
14 | 14 | RUN apt-get update && \  | 
15 |  | -    apt-get install -y build-essential git libcurl4-openssl-dev  | 
 | 15 | +    apt-get install -y build-essential git cmake libcurl4-openssl-dev  | 
16 | 16 | 
 
  | 
17 | 17 | WORKDIR /app  | 
18 | 18 | 
 
  | 
19 | 19 | COPY . .  | 
20 | 20 | 
 
  | 
21 |  | -# Set nvcc architecture  | 
22 |  | -ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}  | 
23 |  | -# Enable CUDA  | 
24 |  | -ENV GGML_CUDA=1  | 
25 |  | -# Enable cURL  | 
26 |  | -ENV LLAMA_CURL=1  | 
27 |  | - | 
28 |  | -RUN make -j$(nproc) llama-server  | 
 | 21 | +# Use the default CUDA archs if not specified  | 
 | 22 | +RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \  | 
 | 23 | +        export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \  | 
 | 24 | +    fi && \  | 
 | 25 | +    cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \  | 
 | 26 | +    cmake --build build --config Release --target llama-server -j$(nproc)  | 
29 | 27 | 
 
  | 
30 | 28 | FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime  | 
31 | 29 | 
 
  | 
32 | 30 | RUN apt-get update && \  | 
33 | 31 |     apt-get install -y libcurl4-openssl-dev libgomp1 curl  | 
34 | 32 | 
 
  | 
35 |  | -COPY --from=build /app/llama-server /llama-server  | 
 | 33 | +COPY --from=build /app/build/ggml/src/libggml.so /libggml.so  | 
 | 34 | +COPY --from=build /app/build/src/libllama.so /libllama.so  | 
 | 35 | +COPY --from=build /app/build/bin/llama-server /llama-server  | 
 | 36 | + | 
 | 37 | +# Must be set to 0.0.0.0 so it can listen to requests from host machine  | 
 | 38 | +ENV LLAMA_ARG_HOST=0.0.0.0  | 
36 | 39 | 
 
  | 
37 | 40 | HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]  | 
38 | 41 | 
 
  | 
 | 
0 commit comments