@@ -48,11 +48,13 @@ RUN git clone --depth 1 --branch b8157 https://github.com/ggml-org/llama.cpp.git
4848 -DBUILD_SHARED_LIBS=ON \
4949 -DCMAKE_BUILD_TYPE=Release && \
5050 cmake --build build -j${LLAMA_BUILD_JOBS} && \
51- mkdir -p /output/lib && \
51+ mkdir -p /output/lib /output/include && \
5252 cp build/bin/*.so /output/lib/ 2>/dev/null || true && \
5353 cp build/src/*.so /output/lib/ 2>/dev/null || true && \
5454 cp build/ggml/src/*.so /output/lib/ 2>/dev/null || true && \
55- cp build/ggml/src/ggml-cpu/*.so /output/lib/ 2>/dev/null || true
55+ cp build/ggml/src/ggml-cpu/*.so /output/lib/ 2>/dev/null || true && \
56+ cp include/llama.h /output/include/ && \
57+ cp ggml/include/*.h /output/include/
5658
5759# =============================================================================
5860# Stage 3: Go build with Vulkan + localllm (CPU)
@@ -68,7 +70,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
6870 libvulkan-dev \
6971 && rm -rf /var/lib/apt/lists/*
7072
71- # Copy llama.cpp CPU libraries
73+ # Copy llama.cpp CPU libraries (before COPY . . for better layer caching)
7274COPY --from=llama-builder /output/lib /build/lib/llama/
7375
7476# Go dependencies
@@ -79,6 +81,11 @@ RUN go mod download
7981COPY . .
8082COPY --from=ui /ui/dist ./ui/dist
8183
84+ # Overwrite repo headers with ones matching the cloned LLAMA_VERSION (must be AFTER COPY . .)
85+ # COPY . . would otherwise leave the stale headers committed to the repo,
86+ # causing a struct size mismatch between the compiled library and the CGo bindings.
87+ COPY --from=llama-builder /output/include/*.h /build/lib/llama/
88+
8289# Build with Vulkan + localllm (CPU-based inference for Heimdall)
8390RUN COMMIT_HASH=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") && \
8491 BUILD_TIME=$(date -u +%Y%m%d-%H%M%S) && \
0 commit comments