Merge pull request #24 from bellorr/fix/docker-llama-header-overwrite

orneryd · web-flow · commit b48bd093b531 · 2026-02-28T08:08:49.000-07:00
diff --git a/docker/Dockerfile.amd64-cuda b/docker/Dockerfile.amd64-cuda
@@ -47,9 +47,8 @@ ENV PATH="/usr/local/go/bin:${PATH}" CUDA_HOME=/usr/local/cuda
 
 WORKDIR /build
 
-# Copy llama artifacts
+# Copy llama static libraries (before COPY . . for better layer caching)
 COPY --from=llama /output/lib/*.a /build/lib/llama/
-COPY --from=llama /output/include/*.h /build/lib/llama/
 
 # Go dependencies
 COPY go.mod go.sum ./
@@ -59,6 +58,11 @@ RUN go mod download
 COPY . .
 COPY --from=ui /ui/dist ./ui/dist
 
+# Overwrite repo headers with ones matching LLAMA_VERSION (must be AFTER COPY . .)
+# COPY . . would otherwise replace these with the stale headers committed to the repo,
+# causing a struct size mismatch between the compiled library and the CGo bindings.
+COPY --from=llama /output/include/*.h /build/lib/llama/
+
 # Build with CUDA + localllm (with or without UI based on HEADLESS arg)
 RUN COMMIT_HASH=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") && \
     BUILD_TIME=$(date -u +%Y%m%d-%H%M%S) && \
diff --git a/docker/Dockerfile.amd64-cuda-heimdall b/docker/Dockerfile.amd64-cuda-heimdall
@@ -44,9 +44,8 @@ ENV PATH="/usr/local/go/bin:${PATH}" CUDA_HOME=/usr/local/cuda
 
 WORKDIR /build
 
-# Copy llama artifacts
+# Copy llama static libraries (before COPY . . for better layer caching)
 COPY --from=llama /output/lib/*.a /build/lib/llama/
-COPY --from=llama /output/include/*.h /build/lib/llama/
 
 # Go dependencies
 COPY go.mod go.sum ./
@@ -56,6 +55,11 @@ RUN go mod download
 COPY . .
 COPY --from=ui /ui/dist ./ui/dist
 
+# Overwrite repo headers with ones matching LLAMA_VERSION (must be AFTER COPY . .)
+# COPY . . would otherwise replace these with the stale headers committed to the repo,
+# causing a struct size mismatch between the compiled library and the CGo bindings.
+COPY --from=llama /output/include/*.h /build/lib/llama/
+
 # Build with CUDA + localllm + heimdall
 RUN echo "Building with CUDA + localllm + heimdall..." && \
     CGO_ENABLED=1 go build -tags "cuda localllm heimdall" \
diff --git a/docker/Dockerfile.amd64-vulkan-heimdall b/docker/Dockerfile.amd64-vulkan-heimdall
@@ -48,11 +48,13 @@ RUN git clone --depth 1 --branch b8157 https://github.com/ggml-org/llama.cpp.git
       -DBUILD_SHARED_LIBS=ON \
       -DCMAKE_BUILD_TYPE=Release && \
     cmake --build build -j${LLAMA_BUILD_JOBS} && \
-    mkdir -p /output/lib && \
+    mkdir -p /output/lib /output/include && \
     cp build/bin/*.so /output/lib/ 2>/dev/null || true && \
     cp build/src/*.so /output/lib/ 2>/dev/null || true && \
     cp build/ggml/src/*.so /output/lib/ 2>/dev/null || true && \
-    cp build/ggml/src/ggml-cpu/*.so /output/lib/ 2>/dev/null || true
+    cp build/ggml/src/ggml-cpu/*.so /output/lib/ 2>/dev/null || true && \
+    cp include/llama.h /output/include/ && \
+    cp ggml/include/*.h /output/include/
 
 # =============================================================================
 # Stage 3: Go build with Vulkan + localllm (CPU)
@@ -68,7 +70,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libvulkan-dev \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy llama.cpp CPU libraries
+# Copy llama.cpp CPU libraries (before COPY . . for better layer caching)
 COPY --from=llama-builder /output/lib /build/lib/llama/
 
 # Go dependencies
@@ -79,6 +81,11 @@ RUN go mod download
 COPY . .
 COPY --from=ui /ui/dist ./ui/dist
 
+# Overwrite repo headers with ones matching the cloned LLAMA_VERSION (must be AFTER COPY . .)
+# COPY . . would otherwise leave the stale headers committed to the repo,
+# causing a struct size mismatch between the compiled library and the CGo bindings.
+COPY --from=llama-builder /output/include/*.h /build/lib/llama/
+
 # Build with Vulkan + localllm (CPU-based inference for Heimdall)
 RUN COMMIT_HASH=$(git rev-parse --short HEAD 2>/dev/null || echo "unknown") && \
     BUILD_TIME=$(date -u +%Y%m%d-%H%M%S) && \
diff --git a/docker/Dockerfile.arm64-metal b/docker/Dockerfile.arm64-metal
@@ -65,9 +65,8 @@ WORKDIR /build
 
 RUN apt-get update && apt-get install -y --no-install-recommends build-essential && rm -rf /var/lib/apt/lists/*
 
-# Copy llama artifacts
+# Copy llama static library (before COPY . . for better layer caching)
 COPY --from=llama /out/libllama_combined.a /build/lib/llama/libllama_linux_arm64.a
-COPY --from=llama /out/*.h /build/lib/llama/
 
 # Go dependencies
 COPY go.mod go.sum ./
@@ -77,6 +76,11 @@ RUN go mod download
 COPY . .
 COPY --from=ui /ui/dist ./ui/dist
 
+# Overwrite repo headers with ones matching LLAMA_VERSION (must be AFTER COPY . .)
+# COPY . . would otherwise replace these with the stale headers committed to the repo,
+# causing a struct size mismatch between the compiled library and the CGo bindings.
+COPY --from=llama /out/*.h /build/lib/llama/
+
 # Build (with or without UI based on HEADLESS arg)
 # NOTE: Cannot use -static linking because Go plugins require dynamic linking
 RUN if [ "$HEADLESS" = "true" ]; then \
diff --git a/docker/Dockerfile.arm64-metal-heimdall b/docker/Dockerfile.arm64-metal-heimdall
@@ -64,9 +64,8 @@ WORKDIR /build
 
 RUN apt-get update && apt-get install -y --no-install-recommends build-essential && rm -rf /var/lib/apt/lists/*
 
-# Copy llama artifacts
+# Copy llama static library (before COPY . . for better layer caching)
 COPY --from=llama /out/libllama_combined.a /build/lib/llama/libllama_linux_arm64.a
-COPY --from=llama /out/*.h /build/lib/llama/
 
 # Go dependencies
 COPY go.mod go.sum ./
@@ -76,6 +75,11 @@ RUN go mod download
 COPY . .
 COPY --from=ui /ui/dist ./ui/dist
 
+# Overwrite repo headers with ones matching LLAMA_VERSION (must be AFTER COPY . .)
+# COPY . . would otherwise replace these with the stale headers committed to the repo,
+# causing a struct size mismatch between the compiled library and the CGo bindings.
+COPY --from=llama /out/*.h /build/lib/llama/
+
 # Build with full features (localllm for both embedding and SLM generation)
 RUN echo "Building NornicDB with Heimdall support..." && \
     CGO_ENABLED=1 go build -tags=localllm \
diff --git a/docker/Dockerfile.cpu-bge b/docker/Dockerfile.cpu-bge
@@ -66,9 +66,8 @@ WORKDIR /build
 
 RUN apt-get update && apt-get install -y --no-install-recommends build-essential && rm -rf /var/lib/apt/lists/*
 
-# Copy llama artifacts - use TARGETARCH to determine the library name
+# Copy llama static library (before COPY . . for better layer caching)
 COPY --from=llama /out/libllama_combined.a /build/lib/llama/libllama_linux_${TARGETARCH}.a
-COPY --from=llama /out/*.h /build/lib/llama/
 
 # Go dependencies
 COPY go.mod go.sum ./
@@ -78,6 +77,11 @@ RUN go mod download
 COPY . .
 COPY --from=ui /ui/dist ./ui/dist
 
+# Overwrite repo headers with ones matching LLAMA_VERSION (must be AFTER COPY . .)
+# COPY . . would otherwise replace these with the stale headers committed to the repo,
+# causing a struct size mismatch between the compiled library and the CGo bindings.
+COPY --from=llama /out/*.h /build/lib/llama/
+
 # Build with localllm tag for embedded embeddings
 RUN if [ "$HEADLESS" = "true" ]; then \
       echo "Building headless CPU-only with embeddings..." && \