From 6c3db6bedf837ff8c9fd54edcbe2f6b7ee08aaf6 Mon Sep 17 00:00:00 2001 From: Dorin Geman Date: Thu, 8 Jan 2026 14:42:58 +0200 Subject: [PATCH 1/6] fix(llamacpp): add missing cmake flags Signed-off-by: Dorin Geman --- llamacpp/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llamacpp/Makefile b/llamacpp/Makefile index 6e398a1ca..4283a5788 100644 --- a/llamacpp/Makefile +++ b/llamacpp/Makefile @@ -30,6 +30,9 @@ ifeq ($(DETECTED_OS),macOS) -DGGML_NATIVE=OFF \ -DGGML_OPENMP=OFF \ -DLLAMA_CURL=OFF \ + -DLLAMA_BUILD_COMMON=ON \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_BUILD_TOOLS=ON \ -GNinja \ -S $(NATIVE_DIR) @echo "Building..." From a8b3c91554e9797b2122ee026bce9ac544c81a94 Mon Sep 17 00:00:00 2001 From: Dorin Geman Date: Thu, 8 Jan 2026 14:48:32 +0200 Subject: [PATCH 2/6] style(llamacpp): use spaces for consistent help output alignment Signed-off-by: Dorin Geman --- llamacpp/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llamacpp/Makefile b/llamacpp/Makefile index 4283a5788..7fffa75aa 100644 --- a/llamacpp/Makefile +++ b/llamacpp/Makefile @@ -90,9 +90,9 @@ install-dir: help: @echo "Available targets:" - @echo " build - Build llama.cpp (macOS only for now)" - @echo " install-deps - Install build dependencies" - @echo " build-dir - Print build directory path" - @echo " install-dir - Print install directory path" - @echo " clean - Clean build artifacts" - @echo " help - Show this help" + @echo " build - Build llama.cpp (macOS only for now)" + @echo " install-deps - Install build dependencies" + @echo " build-dir - Print build directory path" + @echo " install-dir - Print install directory path" + @echo " clean - Clean build artifacts" + @echo " help - Show this help" From aa3c8597fbc8ddca3d397f7fcea8e59d900444f5 Mon Sep 17 00:00:00 2001 From: Dorin Geman Date: Thu, 8 Jan 2026 15:13:46 +0200 Subject: [PATCH 3/6] fix(llamacpp): install renamed binary correctly Signed-off-by: Dorin Geman --- llamacpp/native/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llamacpp/native/CMakeLists.txt b/llamacpp/native/CMakeLists.txt index 6c587b820..268a1d624 100644 --- a/llamacpp/native/CMakeLists.txt +++ b/llamacpp/native/CMakeLists.txt @@ -41,10 +41,8 @@ if (DDLLAMA_BUILD_SERVER) add_custom_target(com.docker.llama-server ALL DEPENDS "${LLAMA_SERVER_DST}") - # Install the renamed binary using TARGETS instead of PROGRAMS for better cross-platform support - install(TARGETS llama-server - RUNTIME DESTINATION bin - RENAME "com.docker.llama-server${CMAKE_EXECUTABLE_SUFFIX}") + # Install the renamed binary + install(PROGRAMS "${LLAMA_SERVER_DST}" DESTINATION bin) endif() if (WIN32 AND DDLLAMA_BUILD_UTILS) From 591986a5befccb617f4627804b2afbc4dc47e012 Mon Sep 17 00:00:00 2001 From: Dorin Geman Date: Thu, 8 Jan 2026 15:32:01 +0200 Subject: [PATCH 4/6] fix(llamacpp): fix rpath for macOS binary Signed-off-by: Dorin Geman --- llamacpp/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llamacpp/Makefile b/llamacpp/Makefile index 7fffa75aa..385a8cc63 100644 --- a/llamacpp/Makefile +++ b/llamacpp/Makefile @@ -46,6 +46,9 @@ ifeq ($(DETECTED_OS),macOS) rm -rf $(INSTALL_DIR)/lib/cmake rm -rf $(INSTALL_DIR)/lib/pkgconfig rm -rf $(INSTALL_DIR)/include + @echo "Fixing rpath..." + install_name_tool -delete_rpath "$(shell pwd)/$(BUILD_DIR)/bin" $(INSTALL_DIR)/bin/com.docker.llama-server + install_name_tool -add_rpath "@executable_path/../lib" $(INSTALL_DIR)/bin/com.docker.llama-server @echo "Build complete! Binaries are in $(INSTALL_DIR)" else ifeq ($(DETECTED_OS),Linux) @echo "Linux build not implemented yet" From 927a4885f1d49d3327ba61339fadae0eb0e8a758 Mon Sep 17 00:00:00 2001 From: Dorin Geman Date: Thu, 8 Jan 2026 15:58:14 +0200 Subject: [PATCH 5/6] fix(scheduling): use /health endpoint for backend readiness checks After PR #541 removed the custom llama.cpp server fork, the /v1/models endpoint returns 200 during model loading (upstream allows it through middleware). This caused the readiness check to pass prematurely before the model was actually ready for inference. Switch to /health which properly returns 503 during loading and 200 only when the backend is fully ready. Signed-off-by: Dorin Geman --- pkg/inference/scheduling/runner.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/inference/scheduling/runner.go b/pkg/inference/scheduling/runner.go index 73ccd7625..ea47fd99a 100644 --- a/pkg/inference/scheduling/runner.go +++ b/pkg/inference/scheduling/runner.go @@ -222,8 +222,9 @@ func (r *runner) wait(ctx context.Context) error { return r.err default: } - // Create and execute a request targeting a known-valid endpoint. - readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/v1/models", http.NoBody) + // Create and execute a request targeting the health endpoint. + // Note: /health returns 503 during model loading, 200 when ready. + readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/health", http.NoBody) if err != nil { return fmt.Errorf("readiness request creation failed: %w", err) } From 9bf9df11da0347b600b2db8b8ee3824910ec90fb Mon Sep 17 00:00:00 2001 From: Dorin Geman Date: Thu, 8 Jan 2026 16:31:06 +0200 Subject: [PATCH 6/6] chore(llamacpp): use $(CURDIR) instead of $(shell pwd) More efficient as it avoids forking a shell process. Signed-off-by: Dorin Geman --- llamacpp/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llamacpp/Makefile b/llamacpp/Makefile index 385a8cc63..83c11cdb8 100644 --- a/llamacpp/Makefile +++ b/llamacpp/Makefile @@ -47,7 +47,7 @@ ifeq ($(DETECTED_OS),macOS) rm -rf $(INSTALL_DIR)/lib/pkgconfig rm -rf $(INSTALL_DIR)/include @echo "Fixing rpath..." - install_name_tool -delete_rpath "$(shell pwd)/$(BUILD_DIR)/bin" $(INSTALL_DIR)/bin/com.docker.llama-server + install_name_tool -delete_rpath "$(CURDIR)/$(BUILD_DIR)/bin" $(INSTALL_DIR)/bin/com.docker.llama-server install_name_tool -add_rpath "@executable_path/../lib" $(INSTALL_DIR)/bin/com.docker.llama-server @echo "Build complete! Binaries are in $(INSTALL_DIR)" else ifeq ($(DETECTED_OS),Linux) @@ -86,10 +86,10 @@ clean: rm -rf $(INSTALL_DIR) build-dir: - @echo "$(shell pwd)/$(BUILD_DIR)" + @echo "$(CURDIR)/$(BUILD_DIR)" install-dir: - @echo "$(shell pwd)/$(INSTALL_DIR)" + @echo "$(CURDIR)/$(INSTALL_DIR)" help: @echo "Available targets:"