| 
 | 1 | +ARG GCC_VERSION=15.2.0  | 
 | 2 | +ARG UBUNTU_VERSION=24.04  | 
 | 3 | + | 
 | 4 | +### Build Llama.cpp stage  | 
 | 5 | +FROM gcc:${GCC_VERSION} AS build  | 
 | 6 | + | 
 | 7 | +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \  | 
 | 8 | +    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \  | 
 | 9 | +    apt update -y && \  | 
 | 10 | +    apt upgrade -y && \  | 
 | 11 | +    apt install -y --no-install-recommends \  | 
 | 12 | +        git cmake ccache ninja-build \  | 
 | 13 | +        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.  | 
 | 14 | +        libopenblas-dev libcurl4-openssl-dev && \  | 
 | 15 | +    rm -rf /var/lib/apt/lists/*  | 
 | 16 | + | 
 | 17 | +WORKDIR /app  | 
 | 18 | +COPY . .  | 
 | 19 | + | 
 | 20 | +RUN --mount=type=cache,target=/root/.ccache \  | 
 | 21 | +    --mount=type=cache,target=/app/build \  | 
 | 22 | +    cmake -S . -B build -G Ninja \  | 
 | 23 | +        -DCMAKE_BUILD_TYPE=Release \  | 
 | 24 | +        -DCMAKE_C_COMPILER_LAUNCHER=ccache \  | 
 | 25 | +        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \  | 
 | 26 | +        -DLLAMA_BUILD_TESTS=OFF \  | 
 | 27 | +        -DGGML_BACKEND_DL=OFF \  | 
 | 28 | +        -DGGML_NATIVE=OFF \  | 
 | 29 | +        -DGGML_BLAS=ON \  | 
 | 30 | +        -DGGML_BLAS_VENDOR=OpenBLAS && \  | 
 | 31 | +    cmake --build build --config Release -j $(nproc) && \  | 
 | 32 | +    cmake --install build --prefix /opt/llama.cpp  | 
 | 33 | + | 
 | 34 | +COPY *.py             /opt/llama.cpp/bin  | 
 | 35 | +COPY .devops/tools.sh /opt/llama.cpp/bin  | 
 | 36 | + | 
 | 37 | +COPY gguf-py          /opt/llama.cpp/gguf-py  | 
 | 38 | +COPY requirements.txt /opt/llama.cpp/gguf-py  | 
 | 39 | +COPY requirements     /opt/llama.cpp/gguf-py/requirements  | 
 | 40 | + | 
 | 41 | + | 
 | 42 | +### Collect all llama.cpp binaries, libraries and distro libraries  | 
 | 43 | +FROM scratch AS collector  | 
 | 44 | + | 
 | 45 | +# Copy llama.cpp binaries and libraries  | 
 | 46 | +COPY --from=build /opt/llama.cpp/bin     /llama.cpp/bin  | 
 | 47 | +COPY --from=build /opt/llama.cpp/lib     /llama.cpp/lib  | 
 | 48 | +COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py  | 
 | 49 | + | 
 | 50 | + | 
 | 51 | +### Base image  | 
 | 52 | +FROM ubuntu:${UBUNTU_VERSION} AS base  | 
 | 53 | + | 
 | 54 | +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \  | 
 | 55 | +    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \  | 
 | 56 | +    apt update -y && \  | 
 | 57 | +    apt install -y --no-install-recommends \  | 
 | 58 | +        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.  | 
 | 59 | +        # See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506  | 
 | 60 | +        curl libgomp1 libopenblas-dev && \  | 
 | 61 | +    apt autoremove -y && \  | 
 | 62 | +    apt clean -y && \  | 
 | 63 | +    rm -rf /tmp/* /var/tmp/* && \  | 
 | 64 | +    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \  | 
 | 65 | +    find /var/cache -type f -delete  | 
 | 66 | + | 
 | 67 | +# Copy llama.cpp libraries  | 
 | 68 | +COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu  | 
 | 69 | + | 
 | 70 | + | 
 | 71 | +### Full  | 
 | 72 | +FROM base AS full  | 
 | 73 | + | 
 | 74 | +ENV PATH="/root/.cargo/bin:${PATH}"  | 
 | 75 | +WORKDIR /app  | 
 | 76 | + | 
 | 77 | +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \  | 
 | 78 | +    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \  | 
 | 79 | +    apt update -y && \  | 
 | 80 | +    apt install -y \  | 
 | 81 | +        git cmake libjpeg-dev \  | 
 | 82 | +        python3 python3-pip python3-dev && \  | 
 | 83 | +    apt autoremove -y && \  | 
 | 84 | +    apt clean -y && \  | 
 | 85 | +    rm -rf /tmp/* /var/tmp/* && \  | 
 | 86 | +    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \  | 
 | 87 | +    find /var/cache -type f -delete  | 
 | 88 | + | 
 | 89 | +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y  | 
 | 90 | + | 
 | 91 | +COPY --from=collector /llama.cpp/bin /app  | 
 | 92 | +COPY --from=collector /llama.cpp/gguf-py /app/gguf-py  | 
 | 93 | + | 
 | 94 | +RUN pip install --no-cache-dir --break-system-packages \  | 
 | 95 | +        -r /app/gguf-py/requirements.txt  | 
 | 96 | + | 
 | 97 | +ENTRYPOINT [ "/app/tools.sh" ]  | 
 | 98 | + | 
 | 99 | + | 
 | 100 | +### CLI Only  | 
 | 101 | +FROM base AS light  | 
 | 102 | + | 
 | 103 | +WORKDIR /llama.cpp/bin  | 
 | 104 | + | 
 | 105 | +# Copy llama.cpp binaries and libraries  | 
 | 106 | +COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin  | 
 | 107 | + | 
 | 108 | +ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]  | 
 | 109 | + | 
 | 110 | + | 
 | 111 | +### Server  | 
 | 112 | +FROM base AS server  | 
 | 113 | + | 
 | 114 | +ENV LLAMA_ARG_HOST=0.0.0.0  | 
 | 115 | + | 
 | 116 | +WORKDIR /llama.cpp/bin  | 
 | 117 | + | 
 | 118 | +# Copy llama.cpp binaries and libraries  | 
 | 119 | +COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin  | 
 | 120 | + | 
 | 121 | +EXPOSE 8080  | 
 | 122 | + | 
 | 123 | +ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]  | 
0 commit comments