docker
diff --git a/‎llamacpp/native/CMakeLists.txt‎
Lines changed: 30 additions & 9 deletions b/‎llamacpp/native/CMakeLists.txt‎
Lines changed: 30 additions & 9 deletions
diff --git a/‎llamacpp/native/README.md‎
Lines changed: 8 additions & 18 deletions b/‎llamacpp/native/README.md‎
Lines changed: 8 additions & 18 deletions
diff --git a/‎llamacpp/native/src/server/CMakeLists.txt‎
Lines changed: 0 additions & 31 deletions b/‎llamacpp/native/src/server/CMakeLists.txt‎
Lines changed: 0 additions & 31 deletions
diff --git a/‎llamacpp/native/src/server/Makefile‎
Lines changed: 0 additions & 18 deletions b/‎llamacpp/native/src/server/Makefile‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎llamacpp/native/src/server/README.md‎
Lines changed: 0 additions & 24 deletions b/‎llamacpp/native/src/server/README.md‎
Lines changed: 0 additions & 24 deletions
@@ -8,22 +8,43 @@ project(
 
 option(DDLLAMA_BUILD_SERVER "Build the DD llama.cpp server executable" ON)
 option(DDLLAMA_BUILD_UTILS "Build utilities, e.g. nv-gpu-info" OFF)
-set(DDLLAMA_PATCH_COMMAND "patch" CACHE STRING "patch command")
 
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
 if (DDLLAMA_BUILD_SERVER)
-    set(LLAMA_BUILD_COMMON ON)
+    # Build upstream llama.cpp with server enabled
+    # Only set these options if they're not already defined to allow consumers to override
+    if(NOT DEFINED LLAMA_BUILD_COMMON)
+        set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common utils library")
+    endif()
+    if(NOT DEFINED LLAMA_BUILD_TOOLS)
+        set(LLAMA_BUILD_TOOLS ON CACHE BOOL "Build tools")
+    endif()
+    if(NOT DEFINED LLAMA_BUILD_SERVER)
+        set(LLAMA_BUILD_SERVER ON CACHE BOOL "Build server")
+    endif()
     add_subdirectory(vendor/llama.cpp)
-    # Get build info and set version for mtmd just like it's done in llama.cpp/CMakeLists.txt
-    include(vendor/llama.cpp/cmake/build-info.cmake)
-    if (NOT DEFINED LLAMA_BUILD_NUMBER)
-        set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
+
+    # Create custom target to copy llama-server to com.docker.llama-server
+    if (WIN32)
+        set(LLAMA_SERVER_DST "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/com.docker.llama-server.exe")
+    else()
+        set(LLAMA_SERVER_DST "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/com.docker.llama-server")
     endif()
-    set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})
-    add_subdirectory(vendor/llama.cpp/tools/mtmd)
-    add_subdirectory(src/server)
+
+    add_custom_command(OUTPUT "${LLAMA_SERVER_DST}"
+        COMMAND ${CMAKE_COMMAND} -E copy "$<TARGET_FILE:llama-server>" "${LLAMA_SERVER_DST}"
+        DEPENDS llama-server
+        COMMENT "Creating com.docker.llama-server from llama-server"
+    )
+
+    add_custom_target(com.docker.llama-server ALL DEPENDS "${LLAMA_SERVER_DST}")
+
+    # Install the renamed binary using TARGETS instead of PROGRAMS for better cross-platform support
+    install(TARGETS llama-server
+        RUNTIME DESTINATION bin
+        RENAME "com.docker.llama-server${CMAKE_EXECUTABLE_SUFFIX}")
 endif()
 
 if (WIN32 AND DDLLAMA_BUILD_UTILS)
 
@@ -1,5 +1,7 @@
 # Native llama-server
 
+This project builds the upstream llama.cpp server (`llama-server`) directly from the llama.cpp submodule and renames it to `com.docker.llama-server`.
+
 ## Building
 
     cmake -B build
@@ -15,7 +17,7 @@
 
 This project uses llama.cpp as a git submodule located at `vendor/llama.cpp`, which points to the official llama.cpp repository at https://github.com/ggml-org/llama.cpp.git.
 
-The project applies custom patches to llama.cpp's server implementation (`server.cpp` and `utils.hpp`) to integrate with the Docker model-runner architecture. These patches are maintained in `src/server/server.patch`.
+We build the upstream `llama-server` binary directly without any modifications.
 
 ### Prerequisites
 
@@ -45,32 +47,20 @@ If the submodule is already initialized, this command is safe to run and will en
    popd
    ```
 
-3. **Apply the custom llama-server patch:**
+3. **Build and test:**
 
    ```bash
-   make -C src/server clean
-   make -C src/server
-   ```
-
-   This will:
-   - Clean the previous patched files
-   - Copy the new `server.cpp` and `utils.hpp` from the updated llama.cpp
-   - Apply our custom patches from `src/server/server.patch`
-
-4. **Build and test:**
+   # Build from the native directory
+   cmake -B build
+   cmake --build build --parallel 8 --config Release
 
-   ```bash
-   # Build from the native directory   
-    cmake -B build
-    cmake --build build --parallel 8 --config Release
-   
    # Test the build
    ./build/bin/com.docker.llama-server --model <path to model>
    ```
 
    Make sure everything builds cleanly without errors.
 
-5. **Commit the submodule update:**
+4. **Commit the submodule update:**
 
    ```bash
    git add vendor/llama.cpp