diff --git a/llamacpp/Makefile b/llamacpp/Makefile index 6e398a1c..83c11cdb 100644 --- a/llamacpp/Makefile +++ b/llamacpp/Makefile @@ -30,6 +30,9 @@ ifeq ($(DETECTED_OS),macOS) -DGGML_NATIVE=OFF \ -DGGML_OPENMP=OFF \ -DLLAMA_CURL=OFF \ + -DLLAMA_BUILD_COMMON=ON \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_BUILD_TOOLS=ON \ -GNinja \ -S $(NATIVE_DIR) @echo "Building..." @@ -43,6 +46,9 @@ ifeq ($(DETECTED_OS),macOS) rm -rf $(INSTALL_DIR)/lib/cmake rm -rf $(INSTALL_DIR)/lib/pkgconfig rm -rf $(INSTALL_DIR)/include + @echo "Fixing rpath..." + install_name_tool -delete_rpath "$(CURDIR)/$(BUILD_DIR)/bin" $(INSTALL_DIR)/bin/com.docker.llama-server + install_name_tool -add_rpath "@executable_path/../lib" $(INSTALL_DIR)/bin/com.docker.llama-server @echo "Build complete! Binaries are in $(INSTALL_DIR)" else ifeq ($(DETECTED_OS),Linux) @echo "Linux build not implemented yet" @@ -80,16 +86,16 @@ clean: rm -rf $(INSTALL_DIR) build-dir: - @echo "$(shell pwd)/$(BUILD_DIR)" + @echo "$(CURDIR)/$(BUILD_DIR)" install-dir: - @echo "$(shell pwd)/$(INSTALL_DIR)" + @echo "$(CURDIR)/$(INSTALL_DIR)" help: @echo "Available targets:" - @echo " build - Build llama.cpp (macOS only for now)" - @echo " install-deps - Install build dependencies" - @echo " build-dir - Print build directory path" - @echo " install-dir - Print install directory path" - @echo " clean - Clean build artifacts" - @echo " help - Show this help" + @echo " build - Build llama.cpp (macOS only for now)" + @echo " install-deps - Install build dependencies" + @echo " build-dir - Print build directory path" + @echo " install-dir - Print install directory path" + @echo " clean - Clean build artifacts" + @echo " help - Show this help" diff --git a/llamacpp/native/CMakeLists.txt b/llamacpp/native/CMakeLists.txt index 6c587b82..268a1d62 100644 --- a/llamacpp/native/CMakeLists.txt +++ b/llamacpp/native/CMakeLists.txt @@ -41,10 +41,8 @@ if (DDLLAMA_BUILD_SERVER) add_custom_target(com.docker.llama-server ALL DEPENDS "${LLAMA_SERVER_DST}") - # Install the renamed binary using TARGETS instead of PROGRAMS for better cross-platform support - install(TARGETS llama-server - RUNTIME DESTINATION bin - RENAME "com.docker.llama-server${CMAKE_EXECUTABLE_SUFFIX}") + # Install the renamed binary + install(PROGRAMS "${LLAMA_SERVER_DST}" DESTINATION bin) endif() if (WIN32 AND DDLLAMA_BUILD_UTILS) diff --git a/pkg/inference/scheduling/runner.go b/pkg/inference/scheduling/runner.go index 73ccd762..ea47fd99 100644 --- a/pkg/inference/scheduling/runner.go +++ b/pkg/inference/scheduling/runner.go @@ -222,8 +222,9 @@ func (r *runner) wait(ctx context.Context) error { return r.err default: } - // Create and execute a request targeting a known-valid endpoint. - readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/v1/models", http.NoBody) + // Create and execute a request targeting the health endpoint. + // Note: /health returns 503 during model loading, 200 when ready. + readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/health", http.NoBody) if err != nil { return fmt.Errorf("readiness request creation failed: %w", err) }